From f9faff289e2b13b8ca9c9ab69627c3297f2121c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20M=C3=A9nager?= <hmenager@pasteur.fr>
Date: Fri, 17 Jun 2016 14:13:00 +0200
Subject: [PATCH 1/4] ignore pycharm files

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index ef302c113..523c0d55b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,5 @@ eggs/
 *~
 \#*\#
 .desktop
+.idea
+

From b9d01d16157fa21e2ac04e240907b96aeb5e6ec9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20M=C3=A9nager?= <hmenager@pasteur.fr>
Date: Fri, 17 Jun 2016 14:15:02 +0200
Subject: [PATCH 2/4] first take at providing a cache mechanism for imported
 schemas

This adds a --cache-net-dir option to the tool, which is a path
to a cache directory that will be used to store the requested schemas
on disk if specified.
It works by adding a handler to the urllib2 library, that handles the
opening of HTTP URLs and first checks that they are stored in the
directory:
- if present, contents is directly read from the stored file
- if not present, HTTP request is actually issued and the contents
  are stored on file as well as sent back to the user.
---
 cwltool/main.py      |  7 ++++-
 cwltool/net_cache.py | 63 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 cwltool/net_cache.py

diff --git a/cwltool/main.py b/cwltool/main.py
index fc0e814d0..62fd7cbb9 100755
--- a/cwltool/main.py
+++ b/cwltool/main.py
@@ -15,6 +15,7 @@
 from .cwlrdf import printrdf, printdot
 from .process import shortname, Process
 from .load_tool import fetch_document, validate_document, make_tool
+from .net_cache import set_cache
 import schema_salad.validate as validate
 import tempfile
 import schema_salad.jsonld_context
@@ -148,6 +149,9 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     parser.add_argument("workflow", type=str, nargs="?", default=None)
     parser.add_argument("job_order", nargs=argparse.REMAINDER)
 
+    exgroup.add_argument("--cache-net-dir", type=str, default=None,
+                         help="Cache directory to use for HTTP resources (e.g., schemas).")
+
     return parser
 
 
@@ -569,12 +573,13 @@ def main(argsl=None,
             return 1
 
         try:
+            if args.cache_net_dir is not None:
+                set_cache(args.cache_net_dir)
             document_loader, workflowobj, uri = fetch_document(args.workflow)
 
             if args.print_deps:
                 printdeps(workflowobj, document_loader, stdout, args.relative_deps)
                 return 0
-
             document_loader, avsc_names, processobj, metadata, uri \
                 = validate_document(document_loader, workflowobj, uri,
                                     enable_dev=args.enable_dev, strict=args.strict,
diff --git a/cwltool/net_cache.py b/cwltool/net_cache.py
new file mode 100644
index 000000000..35e0d3cc0
--- /dev/null
+++ b/cwltool/net_cache.py
@@ -0,0 +1,63 @@
+import urllib2
+import hashlib
+import urlparse, urllib
+import os
+
+cache_folder = None
+
+def path2url(path):
+    return urlparse.urljoin(
+      'file:', urllib.pathname2url(path))
+
+def set_cache(new_cache_folder):
+    global cache_folder
+    cached_handler = CachedHTTPHandler()
+    opener = urllib2.build_opener(cached_handler)
+    urllib2.install_opener(opener)
+    cache_folder = new_cache_folder
+    if not(os.path.exists(cache_folder)):
+        os.mkdir(cache_folder)
+
+
+class CachedHTTPHandler(urllib2.AbstractHTTPHandler):
+
+    handler_order = 100
+
+    def http_open(self, req):
+        cache_path = self._get_local_cachefile_path(req.get_full_url())
+        cache_url = self._get_local_cachefile_url(req.get_full_url())
+        if os.path.exists(cache_path+'_redirect'):
+            redirected_url = open(cache_path+'_redirect','r').read()
+            cache_path = self._get_local_cachefile_path(redirected_url)
+            cache_url = self._get_local_cachefile_url(redirected_url)
+        if not(os.path.exists(cache_path)):
+            response = urllib2.HTTPHandler().http_open(req)
+            code = response.code
+            headers = response.headers
+            if code==200:
+                open(cache_path, 'w').write(response.read())
+            else:
+                if (code in (301, 302, 303, 307)):
+                    if 'location' in headers:
+                        newurl = headers.getheaders('location')[0]
+                    elif 'uri' in headers:
+                        newurl = headers.getheaders('uri')[0]
+                    open(cache_path+'_redirect', 'w').write(newurl)
+                    print newurl
+                    #os.link(cache_path, self._get_local_cachefile_path(newurl))
+                return response
+        response = urllib2.FileHandler().file_open(urllib2.Request(cache_url))
+        response.code = 200
+        response.msg = "everything is ok"
+        return response
+
+    http_request = urllib2.AbstractHTTPHandler.do_request_
+
+    def _get_local_cachefile_name(self, url):
+        return hashlib.md5(url).hexdigest()
+
+    def _get_local_cachefile_path(self, url):
+        return os.path.abspath(os.path.join(cache_folder, self._get_local_cachefile_name(url)))
+
+    def _get_local_cachefile_url(self, url):
+        return path2url(self._get_local_cachefile_path(url))
\ No newline at end of file

From 678c6f81d82a9b00ae4f0325533d9a95350bf916 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20M=C3=A9nager?= <hmenager@pasteur.fr>
Date: Fri, 17 Jun 2016 14:25:11 +0200
Subject: [PATCH 3/4] rename net_cache to urllib2_cache which is more
 descriptive.

---
 cwltool/main.py                            | 2 +-
 cwltool/{net_cache.py => urllib2_cache.py} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cwltool/{net_cache.py => urllib2_cache.py} (100%)

diff --git a/cwltool/main.py b/cwltool/main.py
index 62fd7cbb9..dded1541c 100755
--- a/cwltool/main.py
+++ b/cwltool/main.py
@@ -15,7 +15,7 @@
 from .cwlrdf import printrdf, printdot
 from .process import shortname, Process
 from .load_tool import fetch_document, validate_document, make_tool
-from .net_cache import set_cache
+from .urllib2_cache import set_cache
 import schema_salad.validate as validate
 import tempfile
 import schema_salad.jsonld_context
diff --git a/cwltool/net_cache.py b/cwltool/urllib2_cache.py
similarity index 100%
rename from cwltool/net_cache.py
rename to cwltool/urllib2_cache.py

From 28a2266c90a047195990c2584ceab1b0bdaaf38a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9=20M=C3=A9nager?= <hmenager@pasteur.fr>
Date: Fri, 17 Jun 2016 16:17:21 +0200
Subject: [PATCH 4/4] typo correction in error message

---
 cwltool/process.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cwltool/process.py b/cwltool/process.py
index e742db45f..e5775438f 100644
--- a/cwltool/process.py
+++ b/cwltool/process.py
@@ -288,13 +288,13 @@ def __init__(self, toolpath_object, **kwargs):
             self.inputs_record_schema = schema_salad.schema.make_valid_avro(self.inputs_record_schema, {}, set())
             avro.schema.make_avsc_object(self.inputs_record_schema, self.names)
         except avro.schema.SchemaParseException as e:
-            raise validate.ValidationException(u"Got error `%s` while prcoessing inputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.inputs_record_schema, indent=4)))
+            raise validate.ValidationException(u"Got error `%s` while processing inputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.inputs_record_schema, indent=4)))
 
         try:
             self.outputs_record_schema = schema_salad.schema.make_valid_avro(self.outputs_record_schema, {}, set())
             avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
         except avro.schema.SchemaParseException as e:
-            raise validate.ValidationException(u"Got error `%s` while prcoessing outputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.outputs_record_schema, indent=4)))
+            raise validate.ValidationException(u"Got error `%s` while processing outputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.outputs_record_schema, indent=4)))
 
 
     def _init_job(self, joborder, **kwargs):