Skip to content

Allowing Http/Https files as input #507

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 10, 2017
44 changes: 38 additions & 6 deletions cwltool/pathmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
import stat
import uuid
from functools import partial
from tempfile import NamedTemporaryFile

import requests
from cachecontrol import CacheControl
from cachecontrol.caches import FileCache
from typing import Any, Callable, Dict, Iterable, List, Set, Text, Tuple, Union

import schema_salad.validate as validate
Expand Down Expand Up @@ -139,6 +144,29 @@ def trim_listing(obj):
if obj.get("location", "").startswith("file://") and "listing" in obj:
del obj["listing"]

# Download http Files
def downloadHttpFile(httpurl):
# type: (Text) -> Text
cache_session = None
if "XDG_CACHE_HOME" in os.environ:
directory = os.environ["XDG_CACHE_HOME"]
elif "HOME" in os.environ:
directory = os.environ["HOME"]
else:
directory = os.path.expanduser('~')

cache_session = CacheControl(
requests.Session(),
cache=FileCache(
os.path.join(directory, ".cache", "cwltool")))

r = cache_session.get(httpurl, stream=True)
with NamedTemporaryFile(mode='wb', delete=False) as f:
for chunk in r.iter_content(chunk_size=16384):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
r.close()
return f.name

class PathMapper(object):
"""Mapping of files from relative path provided in the file to a tuple of
Expand Down Expand Up @@ -208,14 +236,18 @@ def visit(self, obj, stagedir, basedir, copy=False, staged=False):
self._pathmap[obj["location"]] = MapperEnt(obj["contents"], tgt, "CreateFile", staged)
else:
with SourceLine(obj, "location", validate.ValidationException):
# Dereference symbolic links
deref = ab
st = os.lstat(deref)
while stat.S_ISLNK(st.st_mode):
rl = os.readlink(deref)
deref = rl if os.path.isabs(rl) else os.path.join(
os.path.dirname(deref), rl)
if urllib.parse.urlsplit(deref).scheme in ['http','https']:
deref = downloadHttpFile(path)
else:
# Dereference symbolic links
st = os.lstat(deref)
while stat.S_ISLNK(st.st_mode):
rl = os.readlink(deref)
deref = rl if os.path.isabs(rl) else os.path.join(
os.path.dirname(deref), rl)
st = os.lstat(deref)

self._pathmap[path] = MapperEnt(deref, tgt, "WritableFile" if copy else "File", staged)
self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir, copy=copy, staged=staged)

Expand Down
2 changes: 2 additions & 0 deletions cwltool/stdfsaccess.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
def abspath(src, basedir): # type: (Text, Text) -> Text
if src.startswith(u"file://"):
ab = six.text_type(uri_file_path(str(src)))
elif urllib.parse.urlsplit(src).scheme in ['http','https']:
return src
else:
if basedir.startswith(u"file://"):
ab = src if os.path.isabs(src) else basedir+ '/'+ src
Expand Down
26 changes: 26 additions & 0 deletions tests/test_http_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from __future__ import absolute_import
import unittest
import os
import tempfile
from cwltool.pathmapper import PathMapper


class TestHttpInput(unittest.TestCase):
def test_http_path_mapping(self):
class SubPathMapper(PathMapper):
def __init__(self, referenced_files, basedir, stagedir):
super(SubPathMapper, self).__init__(referenced_files, basedir, stagedir)
input_file_path = "https://github.com/raw/common-workflow-language/cwltool/master/tests/2.fasta"
tempdir = tempfile.mkdtemp()
base_file = [{
"class": "File",
"location": "https://github.com/raw/common-workflow-language/cwltool/master/tests/2.fasta",
"basename": "chr20.fa"
}]
path_map_obj = SubPathMapper(base_file, os.getcwd(), tempdir)

self.assertIn(input_file_path,path_map_obj._pathmap)
assert os.path.exists(path_map_obj._pathmap[input_file_path].resolved) == 1
with open(path_map_obj._pathmap[input_file_path].resolved) as f:
self.assertIn(">Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;",f.read())
f.close()