Skip to content

Commit 6ecf9cc

Browse files
authored
Eliminate dirent (#112)
* Recover eliminate dirent work. * Squashed 'cwltool/schemas/' changes from afe31c1..b40896c b40896c Fix basename. 34d946e Make location optional to support File contents literals. f69576a Remove mapSubject entryname. 7d002ad Fix field name conflict. d873510 Use file content literals for InitialWorkDir. 3fb9340 Get rid of Dirent. Update spec for basename. Add contents field to File spec. d5b2627 Fix ttl and context generation 01cf6db Site generation works again 7302ecc Make "location" field optional when "listing" is provided. 63dcbdc Merge branch 'master' of github.com:common-workflow-language/common-workflow-language 09bd8ef Updating site generation for draft-4. def6e52 Merge commit '6266928eac323672c0c44d16dca91d808fbc5e68' 6266928 Squashed 'draft-4/salad/' changes from 9c8c36f..c509a07 6f9cb1c fix the draft-4 tests cb78824 Merge pull request #236 from common-workflow-language/directory fcc5374 Merge remote-tracking branch 'origin/master' into directory a90b71f replace errant draft-4.dev3 with dev2 11b96f1 Merge pull request #242 from common-workflow-language/scatter-inputs 93b60d9 Merge remote-tracking branch 'origin/master' into scatter-inputs 9caa2cf Merge pull request #240 from common-workflow-language/dependency_hint db4113b Update text to say that 'inputs' on scatter is the post-scatter input object. 9ba0d38 Add test using value of inputs post-scatter. cae0407 Add test staging files into output directory for update. 972e3b7 Merge pull request #232 from common-workflow-language/move-file-related-fields 6c6ab97 fix failing tests 0b12a24 soften description, allow for list of specs, add identifier 43b5fb9 Describe LooseDependencyHints. 7eeb70a reparent file specific fields from SchemaBase to Parameter git-subtree-dir: cwltool/schemas git-subtree-split: b40896c * Get cwltest from master. * Putting Dirent back in for InitialWorkDir * Squashed 'cwltool/schemas/' changes from b40896c..d5f2322 d5f2322 Restore Dirent for InitialWorkDir only. git-subtree-dir: cwltool/schemas git-subtree-split: d5f2322 * Squashed 'cwltool/schemas/' changes from d5f2322..95b7454 95b7454 Using Dirent. git-subtree-dir: cwltool/schemas git-subtree-split: 95b7454 * Allow entryname to be optional. * Squashed 'cwltool/schemas/' changes from 95b7454..c1cd55f c1cd55f Remove conflicting mapSubject/mapPredicate git-subtree-dir: cwltool/schemas git-subtree-split: c1cd55f * Fix support for file literals. * Add --on-error * update types to be consistent
1 parent e6af66f commit 6ecf9cc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1047
-857
lines changed

cwltool/builder.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import Any, Union, AnyStr, Callable
77
from .errors import WorkflowException
88
from .stdfsaccess import StdFsAccess
9-
from .pathmapper import PathMapper, adjustFileObjs, adjustDirObjs
9+
from .pathmapper import PathMapper, adjustFileObjs, adjustDirObjs, normalizeFilesDirs
1010

1111
CONTENT_LIMIT = 64 * 1024
1212

@@ -31,7 +31,8 @@ def __init__(self): # type: () -> None
3131
self.resources = None # type: Dict[str, Union[int, str]]
3232
self.bindings = [] # type: List[Dict[str, Any]]
3333
self.timeout = None # type: int
34-
self.pathmapper = None # type: PathMapper
34+
self.pathmapper = None # type: PathMapper
35+
self.stagedir = None # type: unicode
3536

3637
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
3738
# type: (Dict[unicode, Any], Any, List[int], List[int]) -> List[Dict[str, Any]]
@@ -112,7 +113,8 @@ def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
112113
if isinstance(sf, dict) or "$(" in sf or "${" in sf:
113114
secondary_eval = self.do_eval(sf, context=datum)
114115
if isinstance(secondary_eval, basestring):
115-
sfpath = {"location": secondary_eval, "class": "File"}
116+
sfpath = {"location": secondary_eval,
117+
"class": "File"}
116118
else:
117119
sfpath = secondary_eval
118120
else:
@@ -121,6 +123,7 @@ def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
121123
datum["secondaryFiles"].extend(sfpath)
122124
else:
123125
datum["secondaryFiles"].append(sfpath)
126+
normalizeFilesDirs(datum["secondaryFiles"])
124127

125128
def _capture_files(f):
126129
self.files.append(f)
@@ -187,8 +190,14 @@ def generate_arg(self, binding): # type: (Dict[str,Any]) -> List[str]
187190

188191
return [a for a in args if a is not None]
189192

190-
def do_eval(self, ex, context=None, pull_image=True):
191-
# type: (Dict[str,str], Any, bool) -> Any
193+
def do_eval(self, ex, context=None, pull_image=True, recursive=False):
194+
# type: (Union[Dict[str, str], unicode], Any, bool, bool) -> Any
195+
if recursive:
196+
if isinstance(ex, dict):
197+
return {k: self.do_eval(v, context, pull_image, recursive) for k,v in ex.iteritems()}
198+
if isinstance(ex, list):
199+
return [self.do_eval(v, context, pull_image, recursive) for v in ex]
200+
192201
return expression.do_eval(ex, self.job, self.requirements,
193202
self.outdir, self.tmpdir,
194203
self.resources,

cwltool/cwltest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ def compare(a, b): # type: (Any, Any) -> bool
5858
raise CompareFail(u"%s not in %s" % (json.dumps(i, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True)))
5959

6060
a = {k: v for k, v in a.iteritems()
61-
if k not in ("path", "location", "listing")}
61+
if k not in ("path", "location", "listing", "basename")}
6262
b = {k: v for k, v in b.iteritems()
63-
if k not in ("path", "location", "listing")}
63+
if k not in ("path", "location", "listing", "basename")}
6464

6565
if len(a) != len(b):
6666
raise CompareFail(u"expected %s\ngot %s" % (json.dumps(a, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True)))

cwltool/draft2tool.py

+61-25
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import glob
77
import logging
88
import hashlib
9-
import random
109
import re
1110
import urlparse
1211
import tempfile
@@ -17,7 +16,7 @@
1716
import shellescape
1817
from typing import Callable, Any, Union, Generator, cast
1918

20-
from .process import Process, shortname, uniquename, getListing
19+
from .process import Process, shortname, uniquename, getListing, normalizeFilesDirs
2120
from .errors import WorkflowException
2221
from .utils import aslist
2322
from . import expression
@@ -50,7 +49,9 @@ def __init__(self): # type: () -> None
5049

5150
def run(self, **kwargs): # type: (**Any) -> None
5251
try:
53-
self.output_callback(self.builder.do_eval(self.script), "success")
52+
ev = self.builder.do_eval(self.script)
53+
normalizeFilesDirs(ev)
54+
self.output_callback(ev, "success")
5455
except Exception as e:
5556
_logger.warn(u"Failed to evaluate expression:\n%s", e, exc_info=(e if kwargs.get('debug') else False))
5657
self.output_callback({}, "permanentFail")
@@ -110,6 +111,17 @@ def run(self, **kwargs):
110111
self.cachebuilder, self.outdir),
111112
"success")
112113

114+
# map files to assigned path inside a container. We need to also explicitly
115+
# walk over input as implicit reassignment doesn't reach everything in builder.bindings
116+
def check_adjust(builder, f):
117+
# type: (Builder, Dict[str, Any]) -> Dict[str,Any]
118+
f["path"] = builder.pathmapper.mapper(f["location"])[1]
119+
f["dirname"], f["basename"] = os.path.split(f["path"])
120+
if f["class"] == "File":
121+
f["nameroot"], f["nameext"] = os.path.splitext(f["basename"])
122+
if not ACCEPTLIST_RE.match(f["basename"]):
123+
raise WorkflowException("Invalid filename: '%s' contains illegal characters" % (f["basename"]))
124+
return f
113125

114126
class CommandLineTool(Process):
115127
def __init__(self, toolpath_object, **kwargs):
@@ -120,7 +132,7 @@ def makeJobRunner(self): # type: () -> CommandLineJob
120132
return CommandLineJob()
121133

122134
def makePathMapper(self, reffiles, stagedir, **kwargs):
123-
# type: (Set[Any], unicode, **Any) -> PathMapper
135+
# type: (List[Any], unicode, **Any) -> PathMapper
124136
dockerReq, _ = self.get_requirement("DockerRequirement")
125137
try:
126138
return PathMapper(reffiles, kwargs["basedir"], stagedir)
@@ -141,8 +153,13 @@ def job(self, joborder, output_callback, **kwargs):
141153
cachebuilder = self._init_job(joborder, **cacheargs)
142154
cachebuilder.pathmapper = PathMapper(cachebuilder.files,
143155
kwargs["basedir"],
144-
cachebuilder.stagedir)
145-
156+
cachebuilder.stagedir,
157+
separateDirs=False)
158+
_check_adjust = partial(check_adjust, cachebuilder)
159+
adjustFileObjs(cachebuilder.files, _check_adjust)
160+
adjustFileObjs(cachebuilder.bindings, _check_adjust)
161+
adjustDirObjs(cachebuilder.files, _check_adjust)
162+
adjustDirObjs(cachebuilder.bindings, _check_adjust)
146163
cmdline = flatten(map(cachebuilder.generate_arg, cachebuilder.bindings))
147164
(docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
148165
if docker_req and kwargs.get("use_container") is not False:
@@ -151,8 +168,9 @@ def job(self, joborder, output_callback, **kwargs):
151168
keydict = {u"cmdline": cmdline}
152169

153170
for _,f in cachebuilder.pathmapper.items():
154-
st = os.stat(f[0])
155-
keydict[f[0]] = [st.st_size, int(st.st_mtime * 1000)]
171+
if f.type == "File":
172+
st = os.stat(f.resolved)
173+
keydict[f.resolved] = [st.st_size, int(st.st_mtime * 1000)]
156174

157175
interesting = {"DockerRequirement",
158176
"EnvVarRequirement",
@@ -236,19 +254,10 @@ def rm_pending_output_callback(output_callback, jobcachepending,
236254
builder.pathmapper = self.makePathMapper(reffiles, builder.stagedir, **kwargs)
237255
builder.requirements = j.requirements
238256

239-
# map files to assigned path inside a container. We need to also explicitly
240-
# walk over input as implicit reassignment doesn't reach everything in builder.bindings
241-
def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any]
242-
f["path"] = builder.pathmapper.mapper(f["location"])[1]
243-
f["dirname"], f["basename"] = os.path.split(f["path"])
244-
if f["class"] == "File":
245-
f["nameroot"], f["nameext"] = os.path.splitext(f["basename"])
246-
if not ACCEPTLIST_RE.match(f["basename"]):
247-
raise WorkflowException("Invalid filename: '%s' contains illegal characters" % (f["basename"]))
248-
return f
249-
250257
_logger.debug(u"[job %s] path mappings is %s", j.name, json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4))
251258

259+
_check_adjust = partial(check_adjust, builder)
260+
252261
adjustFileObjs(builder.files, _check_adjust)
253262
adjustFileObjs(builder.bindings, _check_adjust)
254263
adjustDirObjs(builder.files, _check_adjust)
@@ -260,27 +269,51 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any]
260269

261270
_logger.debug(u"[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4))
262271

263-
dockerReq, _ = self.get_requirement("DockerRequirement")
272+
dockerReq = self.get_requirement("DockerRequirement")[0]
264273
if dockerReq and kwargs.get("use_container"):
265274
out_prefix = kwargs.get("tmp_outdir_prefix")
266275
j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix)
267276
tmpdir_prefix = kwargs.get('tmpdir_prefix')
268277
j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix)
269-
j.stagedir = None
278+
j.stagedir = tempfile.mkdtemp(prefix=tmpdir_prefix)
270279
else:
271280
j.outdir = builder.outdir
272281
j.tmpdir = builder.tmpdir
273282
j.stagedir = builder.stagedir
274283

275284
initialWorkdir = self.get_requirement("InitialWorkDirRequirement")[0]
276-
j.generatefiles = {"class": "Directory", "listing": []}
285+
j.generatefiles = {"class": "Directory", "listing": [], "basename": ""}
277286
if initialWorkdir:
287+
ls = [] # type: List[Dict[str, Any]]
278288
if isinstance(initialWorkdir["listing"], (str, unicode)):
279-
j.generatefiles["listing"] = builder.do_eval(initialWorkdir["listing"])
289+
ls = builder.do_eval(initialWorkdir["listing"])
280290
else:
281291
for t in initialWorkdir["listing"]:
282-
j.generatefiles["listing"].append({"entryname": builder.do_eval(t["entryname"]),
283-
"entry": copy.deepcopy(builder.do_eval(t["entry"]))})
292+
if "entry" in t:
293+
et = {"entry": builder.do_eval(t["entry"])}
294+
if "entryname" in t:
295+
et["entryname"] = builder.do_eval(t["entryname"])
296+
else:
297+
et["entryname"] = None
298+
ls.append(et)
299+
else:
300+
ls.append(t)
301+
for i,t in enumerate(ls):
302+
if "entry" in t:
303+
if isinstance(t["entry"], (str, unicode)):
304+
ls[i] = {
305+
"class": "File",
306+
"basename": t["entryname"],
307+
"contents": t["entry"]
308+
}
309+
else:
310+
if t["entryname"]:
311+
t = copy.deepcopy(t)
312+
t["entry"]["basename"] = t["entryname"]
313+
ls[i] = t["entry"]
314+
j.generatefiles[u"listing"] = ls
315+
316+
normalizeFilesDirs(j.generatefiles)
284317

285318
j.environment = {}
286319
evr = self.get_requirement("EnvVarRequirement")[0]
@@ -321,6 +354,8 @@ def collect_output_ports(self, ports, builder, outdir):
321354
# https://github.com/python/mypy/issues/797
322355
partial(revmap_file, builder, outdir)))
323356
adjustFileObjs(ret, remove_path)
357+
adjustDirObjs(ret, remove_path)
358+
normalizeFilesDirs(ret)
324359
validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
325360
return ret
326361

@@ -334,6 +369,7 @@ def collect_output_ports(self, ports, builder, outdir):
334369
if ret:
335370
adjustFileObjs(ret, remove_path)
336371
adjustDirObjs(ret, remove_path)
372+
normalizeFilesDirs(ret)
337373
validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
338374
return ret if ret is not None else {}
339375
except validate.ValidationException as e:

cwltool/job.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,12 @@ def __init__(self): # type: () -> None
5858
self.outdir = None # type: str
5959
self.tmpdir = None # type: str
6060
self.environment = None # type: Dict[str,str]
61-
self.generatefiles = None # type: Dict[str,Union[Dict[str,str],str]]
61+
self.generatefiles = None # type: Dict[unicode, Union[List[Dict[str, str]], Dict[str,str], str]]
62+
self.stagedir = None # type: unicode
6263

6364
def run(self, dry_run=False, pull_image=True, rm_container=True,
6465
rm_tmpdir=True, move_outputs="move", **kwargs):
65-
# type: (bool, bool, bool, bool, bool, **Any) -> Union[Tuple[str,Dict[None,None]],None]
66+
# type: (bool, bool, bool, bool, bool, unicode, **Any) -> Union[Tuple[str,Dict[None,None]],None]
6667
if not os.path.exists(self.outdir):
6768
os.makedirs(self.outdir)
6869

@@ -78,10 +79,12 @@ def run(self, dry_run=False, pull_image=True, rm_container=True,
7879

7980
(docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
8081

81-
for f in self.pathmapper.files():
82-
p = self.pathmapper.mapper(f)
82+
for knownfile in self.pathmapper.files():
83+
p = self.pathmapper.mapper(knownfile)
8384
if p.type == "File" and not os.path.isfile(p[0]):
84-
raise WorkflowException(u"Input file %s (at %s) not found or is not a regular file." % (f, self.pathmapper.mapper(f)[0]))
85+
raise WorkflowException(
86+
u"Input file %s (at %s) not found or is not a regular file."
87+
% (knownfile, self.pathmapper.mapper(knownfile)[0]))
8588

8689
img_id = None
8790
if docker_req and kwargs.get("use_container") is not False:
@@ -97,6 +100,11 @@ def run(self, dry_run=False, pull_image=True, rm_container=True,
97100
vol = self.pathmapper.mapper(src)
98101
if vol.type == "File":
99102
runtime.append(u"--volume=%s:%s:ro" % (vol.resolved, vol.target))
103+
if vol.type == "CreateFile":
104+
createtmp = os.path.join(self.stagedir, os.path.basename(vol.target))
105+
with open(createtmp, "w") as f:
106+
f.write(vol.resolved.encode("utf-8"))
107+
runtime.append(u"--volume=%s:%s:ro" % (createtmp, vol.target))
100108
runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl"))
101109
runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp"))
102110
runtime.append(u"--workdir=%s" % ("/var/spool/cwl"))

cwltool/load_tool.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import urlparse
99
from schema_salad.ref_resolver import Loader
1010
import schema_salad.validate as validate
11+
from schema_salad.validate import ValidationException
1112
import schema_salad.schema as schema
1213
from avro.schema import Names
1314
from . import update
@@ -37,13 +38,12 @@ def fetch_document(argsworkflow):
3738
workflowobj = argsworkflow
3839
uri = "#" + str(id(argsworkflow))
3940
else:
40-
raise validate.ValidationException(
41-
"Must be URI or object: '%s'" % argsworkflow)
41+
raise ValidationException("Must be URI or object: '%s'" % argsworkflow)
4242

4343
return document_loader, workflowobj, uri
4444

4545
def _convert_stdstreams_to_files(workflowobj):
46-
# type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]) -> None
46+
# type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]]) -> None
4747

4848
if isinstance(workflowobj, dict):
4949
if ('class' in workflowobj
@@ -53,7 +53,7 @@ def _convert_stdstreams_to_files(workflowobj):
5353
for streamtype in ['stdout', 'stderr']:
5454
if out['type'] == streamtype:
5555
if 'outputBinding' in out:
56-
raise validate.ValidateException(
56+
raise ValidationException(
5757
"Not allowed to specify outputBinding when"
5858
" using %s shortcut." % streamtype)
5959
if streamtype in workflowobj:
@@ -109,12 +109,11 @@ def validate_document(document_loader, workflowobj, uri,
109109
workflowobj["id"] = fileuri
110110
processobj, metadata = document_loader.resolve_all(workflowobj, fileuri)
111111
if not isinstance(processobj, (dict, list)):
112-
raise validate.ValidationException("Workflow must be a dict or list.")
112+
raise ValidationException("Workflow must be a dict or list.")
113113

114114
if not metadata:
115115
if not isinstance(processobj, dict):
116-
raise validate.ValidationException(
117-
"Draft-2 workflows must be a dict.")
116+
raise ValidationException("Draft-2 workflows must be a dict.")
118117
metadata = {"$namespaces": processobj.get("$namespaces", {}),
119118
"$schemas": processobj.get("$schemas", []),
120119
"cwlVersion": processobj["cwlVersion"]}

0 commit comments

Comments
 (0)