1
+ """Loads a CWL document."""
1
2
from __future__ import absolute_import
2
3
# pylint: disable=unused-import
3
- """Loads a CWL document."""
4
4
5
5
import logging
6
6
import os
9
9
import hashlib
10
10
import json
11
11
import copy
12
- from typing import Any , Callable , Dict , List , Text , Tuple , Union , cast , Iterable
12
+ from typing import (Any , Callable , Dict , Iterable , List , Mapping , Optional ,
13
+ Text , Tuple , Union , cast )
13
14
14
15
import requests .sessions
15
16
from six import itervalues , string_types
17
+ from six .moves import urllib
16
18
17
19
import schema_salad .schema as schema
18
20
from avro .schema import Names
19
21
from ruamel .yaml .comments import CommentedMap , CommentedSeq
20
- from schema_salad .ref_resolver import Fetcher , Loader , file_uri
22
+ from schema_salad .ref_resolver import ContextType , Fetcher , Loader , file_uri
21
23
from schema_salad .sourceline import cmap
22
24
from schema_salad .validate import ValidationException
23
- from six .moves import urllib
24
25
25
26
from . import process , update
26
27
from .errors import WorkflowException
27
28
from .process import Process , shortname , get_schema
28
29
from .update import ALLUPDATES
29
30
30
31
_logger = logging .getLogger ("cwltool" )
31
-
32
32
jobloaderctx = {
33
33
u"cwl" : "https://w3id.org/cwl/cwl#" ,
34
34
u"cwltool" : "http://commonwl.org/cwltool#" ,
35
35
u"path" : {u"@type" : u"@id" },
36
36
u"location" : {u"@type" : u"@id" },
37
37
u"format" : {u"@type" : u"@id" },
38
38
u"id" : u"@id"
39
- }
39
+ } # type: ContextType
40
40
41
41
42
42
overrides_ctx = {
51
51
"@id" : "cwltool:override" ,
52
52
"mapSubject" : "class"
53
53
}
54
- } # type: Dict[Text, Union[Dict[Any, Any], Text, Iterable[Text]]]
54
+ } # type: ContextType
55
+
56
+
57
+ FetcherConstructorType = Callable [[Dict [Text , Union [Text , bool ]],
58
+ requests .sessions .Session ], Fetcher ]
59
+
60
+ loaders = {} # type: Dict[FetcherConstructorType, Loader]
61
+
62
+ def default_loader (fetcher_constructor ):
63
+ # type: (Optional[FetcherConstructorType]) -> Loader
64
+ if fetcher_constructor in loaders :
65
+ return loaders [fetcher_constructor ]
66
+ else :
67
+ loader = Loader (jobloaderctx , fetcher_constructor = fetcher_constructor )
68
+ loaders [fetcher_constructor ] = loader
69
+ return loader
55
70
56
71
def resolve_tool_uri (argsworkflow , # type: Text
57
72
resolver = None , # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
58
- fetcher_constructor = None ,
59
- # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
73
+ fetcher_constructor = None , # type: FetcherConstructorType
60
74
document_loader = None # type: Loader
61
- ):
62
- # type: (...) -> Tuple[Text, Text]
75
+ ): # type: (...) -> Tuple[Text, Text]
63
76
64
77
uri = None # type: Text
65
78
split = urllib .parse .urlsplit (argsworkflow )
66
79
# In case of Windows path, urlsplit misjudge Drive letters as scheme, here we are skipping that
67
- if split .scheme and split .scheme in [u'http' ,u'https' ,u'file' ]:
80
+ if split .scheme and split .scheme in [u'http' , u'https' , u'file' ]:
68
81
uri = argsworkflow
69
82
elif os .path .exists (os .path .abspath (argsworkflow )):
70
83
uri = file_uri (str (os .path .abspath (argsworkflow )))
71
84
elif resolver :
72
85
if document_loader is None :
73
- document_loader = Loader ( jobloaderctx , fetcher_constructor = fetcher_constructor ) # type: ignore
86
+ document_loader = default_loader ( fetcher_constructor ) # type: ignore
74
87
uri = resolver (document_loader , argsworkflow )
75
88
76
89
if uri is None :
@@ -85,18 +98,17 @@ def resolve_tool_uri(argsworkflow, # type: Text
85
98
86
99
def fetch_document (argsworkflow , # type: Union[Text, Dict[Text, Any]]
87
100
resolver = None , # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
88
- fetcher_constructor = None
89
- # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
90
- ):
91
- # type: (...) -> Tuple[Loader, CommentedMap, Text]
101
+ fetcher_constructor = None # type: FetcherConstructorType
102
+ ): # type: (...) -> Tuple[Loader, CommentedMap, Text]
92
103
"""Retrieve a CWL document."""
93
104
94
- document_loader = Loader ( jobloaderctx , fetcher_constructor = fetcher_constructor ) # type: ignore
105
+ document_loader = default_loader ( fetcher_constructor ) # type: ignore
95
106
96
107
uri = None # type: Text
97
108
workflowobj = None # type: CommentedMap
98
109
if isinstance (argsworkflow , string_types ):
99
- uri , fileuri = resolve_tool_uri (argsworkflow , resolver = resolver , document_loader = document_loader )
110
+ uri , fileuri = resolve_tool_uri (argsworkflow , resolver = resolver ,
111
+ document_loader = document_loader )
100
112
workflowobj = document_loader .fetch (fileuri )
101
113
elif isinstance (argsworkflow , dict ):
102
114
uri = "#" + Text (id (argsworkflow ))
@@ -126,7 +138,7 @@ def _convert_stdstreams_to_files(workflowobj):
126
138
sort_keys = True ).encode ('utf-8' )).hexdigest ())
127
139
workflowobj [streamtype ] = filename
128
140
out ['type' ] = 'File'
129
- out ['outputBinding' ] = {'glob' : filename }
141
+ out ['outputBinding' ] = cmap ( {'glob' : filename })
130
142
for inp in workflowobj .get ('inputs' , []):
131
143
if inp .get ('type' ) == 'stdin' :
132
144
if 'inputBinding' in inp :
@@ -170,25 +182,25 @@ def validate_document(document_loader, # type: Loader
170
182
enable_dev = False , # type: bool
171
183
strict = True , # type: bool
172
184
preprocess_only = False , # type: bool
173
- fetcher_constructor = None ,
174
- skip_schemas = None ,
175
- # type: Callable[[ Dict[Text, Text], requests.sessions.Session], Fetcher ]
176
- overrides = None # type: List [Dict]
185
+ fetcher_constructor = None , # type: FetcherConstructorType
186
+ skip_schemas = None , # type: bool
187
+ overrides = None , # type: List[ Dict]
188
+ metadata = None , # type: Optional [Dict]
177
189
):
178
190
# type: (...) -> Tuple[Loader, Names, Union[Dict[Text, Any], List[Dict[Text, Any]]], Dict[Text, Any], Text]
179
191
"""Validate a CWL document."""
180
192
181
193
if isinstance (workflowobj , list ):
182
- workflowobj = {
194
+ workflowobj = cmap ( {
183
195
"$graph" : workflowobj
184
- }
196
+ }, fn = uri )
185
197
186
198
if not isinstance (workflowobj , dict ):
187
199
raise ValueError ("workflowjobj must be a dict, got '%s': %s" % (type (workflowobj ), workflowobj ))
188
200
189
201
jobobj = None
190
202
if "cwl:tool" in workflowobj :
191
- job_loader = Loader ( jobloaderctx , fetcher_constructor = fetcher_constructor ) # type: ignore
203
+ job_loader = default_loader ( fetcher_constructor ) # type: ignore
192
204
jobobj , _ = job_loader .resolve_all (workflowobj , uri )
193
205
uri = urllib .parse .urljoin (uri , workflowobj ["https://w3id.org/cwl/cwl#tool" ])
194
206
del cast (dict , jobobj )["https://w3id.org/cwl/cwl#tool" ]
@@ -200,22 +212,25 @@ def validate_document(document_loader, # type: Loader
200
212
workflowobj = fetch_document (uri , fetcher_constructor = fetcher_constructor )[1 ]
201
213
202
214
fileuri = urllib .parse .urldefrag (uri )[0 ]
203
-
204
- if "cwlVersion" in workflowobj :
205
- if not isinstance (workflowobj ["cwlVersion" ], (str , Text )):
206
- raise Exception ("'cwlVersion' must be a string, got %s" % type (workflowobj ["cwlVersion" ]))
207
- # strip out version
208
- workflowobj ["cwlVersion" ] = re .sub (
209
- r"^(?:cwl:|https://w3id.org/cwl/cwl#)" , "" ,
210
- workflowobj ["cwlVersion" ])
211
- if workflowobj ["cwlVersion" ] not in list (ALLUPDATES ):
212
- # print out all the Supported Versions of cwlVersion
213
- versions = list (ALLUPDATES ) # ALLUPDATES is a dict
214
- versions .sort ()
215
- raise ValidationException ("'cwlVersion' not valid. Supported CWL versions are: \n {}" .format ("\n " .join (versions )))
216
- else :
217
- raise ValidationException ("No cwlVersion found."
218
- "Use the following syntax in your CWL workflow to declare version: cwlVersion: <version>" )
215
+ if "cwlVersion" not in workflowobj :
216
+ if metadata and 'cwlVersion' in metadata :
217
+ workflowobj ['cwlVersion' ] = metadata ['cwlVersion' ]
218
+ else :
219
+ raise ValidationException ("No cwlVersion found."
220
+ "Use the following syntax in your CWL document to declare "
221
+ "the version: cwlVersion: <version>" )
222
+
223
+ if not isinstance (workflowobj ["cwlVersion" ], (str , Text )):
224
+ raise Exception ("'cwlVersion' must be a string, got %s" % type (workflowobj ["cwlVersion" ]))
225
+ # strip out version
226
+ workflowobj ["cwlVersion" ] = re .sub (
227
+ r"^(?:cwl:|https://w3id.org/cwl/cwl#)" , "" ,
228
+ workflowobj ["cwlVersion" ])
229
+ if workflowobj ["cwlVersion" ] not in list (ALLUPDATES ):
230
+ # print out all the Supported Versions of cwlVersion
231
+ versions = list (ALLUPDATES ) # ALLUPDATES is a dict
232
+ versions .sort ()
233
+ raise ValidationException ("'cwlVersion' not valid. Supported CWL versions are: \n {}" .format ("\n " .join (versions )))
219
234
220
235
if workflowobj ["cwlVersion" ] == "draft-2" :
221
236
workflowobj = cast (CommentedMap , cmap (update ._draft2toDraft3dev1 (
@@ -238,36 +253,36 @@ def validate_document(document_loader, # type: Loader
238
253
_add_blank_ids (workflowobj )
239
254
240
255
workflowobj ["id" ] = fileuri
241
- processobj , metadata = document_loader .resolve_all (workflowobj , fileuri )
256
+ processobj , new_metadata = document_loader .resolve_all (workflowobj , fileuri )
242
257
if not isinstance (processobj , (CommentedMap , CommentedSeq )):
243
258
raise ValidationException ("Workflow must be a dict or list." )
244
259
245
- if not metadata :
260
+ if not new_metadata :
246
261
if not isinstance (processobj , dict ):
247
262
raise ValidationException ("Draft-2 workflows must be a dict." )
248
- metadata = cast (CommentedMap , cmap ({ "$namespaces" : processobj . get ( "$namespaces" , {}),
249
- "$schemas " : processobj .get ("$schemas " , [] ),
250
- "cwlVersion " : processobj [ "cwlVersion" ]} ,
251
- fn = fileuri ))
263
+ new_metadata = cast (CommentedMap , cmap (
264
+ { "$namespaces " : processobj .get ("$namespaces " , {} ),
265
+ "$schemas " : processobj . get ( "$schemas" , []) ,
266
+ "cwlVersion" : processobj [ "cwlVersion" ]}, fn = fileuri ))
252
267
253
268
_convert_stdstreams_to_files (workflowobj )
254
269
255
270
if preprocess_only :
256
- return document_loader , avsc_names , processobj , metadata , uri
271
+ return document_loader , avsc_names , processobj , new_metadata , uri
257
272
258
273
schema .validate_doc (avsc_names , processobj , document_loader , strict )
259
274
260
- if metadata .get ("cwlVersion" ) != update .LATEST :
275
+ if new_metadata .get ("cwlVersion" ) != update .LATEST :
261
276
processobj = cast (CommentedMap , cmap (update .update (
262
- processobj , document_loader , fileuri , enable_dev , metadata )))
277
+ processobj , document_loader , fileuri , enable_dev , new_metadata )))
263
278
264
279
if jobobj :
265
- metadata [u"cwl:defaults" ] = jobobj
280
+ new_metadata [u"cwl:defaults" ] = jobobj
266
281
267
282
if overrides :
268
- metadata [u"cwltool:overrides" ] = overrides
283
+ new_metadata [u"cwltool:overrides" ] = overrides
269
284
270
- return document_loader , avsc_names , processobj , metadata , uri
285
+ return document_loader , avsc_names , processobj , new_metadata , uri
271
286
272
287
273
288
def make_tool (document_loader , # type: Loader
@@ -322,7 +337,7 @@ def load_tool(argsworkflow, # type: Union[Text, Dict[Text, Any]]
322
337
enable_dev = False , # type: bool
323
338
strict = True , # type: bool
324
339
resolver = None , # type: Callable[[Loader, Union[Text, Dict[Text, Any]]], Text]
325
- fetcher_constructor = None , # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher]
340
+ fetcher_constructor = None , # type: FetcherConstructorType
326
341
overrides = None
327
342
):
328
343
# type: (...) -> Process
@@ -332,7 +347,8 @@ def load_tool(argsworkflow, # type: Union[Text, Dict[Text, Any]]
332
347
document_loader , avsc_names , processobj , metadata , uri = validate_document (
333
348
document_loader , workflowobj , uri , enable_dev = enable_dev ,
334
349
strict = strict , fetcher_constructor = fetcher_constructor ,
335
- overrides = overrides )
350
+ overrides = overrides , metadata = kwargs .get ('metadata' , None )
351
+ if kwargs else None )
336
352
return make_tool (document_loader , avsc_names , metadata , uri ,
337
353
makeTool , kwargs if kwargs else {})
338
354
0 commit comments