diff --git a/cwltool/pack.py b/cwltool/pack.py index 898f5485b..aab64e3ac 100644 --- a/cwltool/pack.py +++ b/cwltool/pack.py @@ -1,9 +1,11 @@ from __future__ import absolute_import import copy +import re from typing import Any, Callable, Dict, List, Set, Text, Union, cast from schema_salad.ref_resolver import Loader from six.moves import urllib +from ruamel.yaml.comments import CommentedSeq, CommentedMap from .process import shortname, uniquename import six @@ -64,7 +66,12 @@ def replace_refs(d, rewrite, stem, newstem): if v in rewrite: d[s] = rewrite[v] elif v.startswith(stem): - d[s] = newstem + v[len(stem):] + id_ = v[len(stem):] + # prevent appending newstems if tool is already packed + if id_.startswith(newstem.strip("#")): + d[s] = "#" + id_ + else: + d[s] = newstem + id_ replace_refs(v, rewrite, stem, newstem) def import_embed(d, seen): @@ -106,12 +113,16 @@ def loadref(b, u): mainpath, _ = urllib.parse.urldefrag(uri) - def rewrite_id(r, mainuri): - # type: (Text, Text) -> None + def rewrite_id(r, mainuri, document_packed=False): + # type: (Text, Text, bool) -> None if r == mainuri: rewrite[r] = "#main" elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"): - pass + if document_packed: + # rewrite tool and mainuri ids in a packed document + tool_id = re.search("#[^/]*$", r) + if tool_id: + rewrite[r] = tool_id.group() else: path, frag = urllib.parse.urldefrag(r) if path == mainpath: @@ -122,9 +133,10 @@ def rewrite_id(r, mainuri): sortedids = sorted(ids) + is_document_packed = all(id.startswith(uri) for id in sortedids) for r in sortedids: if r in document_loader.idx: - rewrite_id(r, uri) + rewrite_id(r, uri, is_document_packed) packed = {"$graph": [], "cwlVersion": metadata["cwlVersion"] } # type: Dict[Text, Any] @@ -132,6 +144,9 @@ def rewrite_id(r, mainuri): schemas = set() # type: Set[Text] for r in sorted(runs): dcr, metadata = document_loader.resolve_ref(r) + if isinstance(dcr, CommentedSeq): + dcr = dcr[0] + dcr = cast(CommentedMap, dcr) if not isinstance(dcr, dict): continue for doc in (dcr, metadata): @@ -141,7 +156,7 @@ def rewrite_id(r, mainuri): if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"): continue dc = cast(Dict[Text, Any], copy.deepcopy(dcr)) - v = rewrite[r] + v = rewrite.get(r, r + "#main") dc["id"] = v for n in ("name", "cwlVersion", "$namespaces", "$schemas"): if n in dc: diff --git a/tests/test_pack.py b/tests/test_pack.py index 34a14e6d3..ebddd79eb 100644 --- a/tests/test_pack.py +++ b/tests/test_pack.py @@ -60,3 +60,32 @@ def test_pack_missing_cwlVersion(self): packed = json.loads(print_pack(document_loader, processobj, uri, metadata)) self.assertEqual('v1.0', packed["cwlVersion"]) + + def test_pack_idempotence_tool(self): + """Test to ensure that pack produces exactly the same document for + an already packed document""" + + # Testing single tool + self._pack_idempotently("tests/wf/hello_single_tool.cwl") + + def test_pack_idempotence_workflow(self): + """Test to ensure that pack produces exactly the same document for + an already packed document""" + + # Testing workflow + self._pack_idempotently("tests/wf/count-lines1-wf.cwl") + + def _pack_idempotently(self, document): + self.maxDiff = None + document_loader, workflowobj, uri = fetch_document( + get_data(document)) + document_loader, avsc_names, processobj, metadata, uri = validate_document( + document_loader, workflowobj, uri) + # generate pack output dict + packed = json.loads(print_pack(document_loader, processobj, uri, metadata)) + + document_loader, workflowobj, uri2 = fetch_document(packed) + document_loader, avsc_names, processobj, metadata, uri2 = validate_document( + document_loader, workflowobj, uri) + double_packed = json.loads(print_pack(document_loader, processobj, uri2, metadata)) + self.assertEqual(packed, double_packed) diff --git a/tests/wf/count-lines1-wf.cwl b/tests/wf/count-lines1-wf.cwl new file mode 100644 index 000000000..77cbf3aa0 --- /dev/null +++ b/tests/wf/count-lines1-wf.cwl @@ -0,0 +1,25 @@ +#!/usr/bin/env cwl-runner +class: Workflow +cwlVersion: v1.0 + +inputs: + file1: + type: File + +outputs: + count_output: + type: int + outputSource: step2/output + +steps: + step1: + run: wc-tool.cwl + in: + file1: file1 + out: [output] + + step2: + run: parseInt-tool.cwl + in: + file1: step1/output + out: [output] diff --git a/tests/wf/parseInt-tool.cwl b/tests/wf/parseInt-tool.cwl new file mode 100644 index 000000000..42f166bde --- /dev/null +++ b/tests/wf/parseInt-tool.cwl @@ -0,0 +1,16 @@ +#!/usr/bin/env cwl-runner + +class: ExpressionTool +requirements: + - class: InlineJavascriptRequirement +cwlVersion: v1.0 + +inputs: + file1: + type: File + inputBinding: { loadContents: true } + +outputs: + output: int + +expression: "$({'output': parseInt(inputs.file1.contents)})" diff --git a/tests/wf/wc-tool.cwl b/tests/wf/wc-tool.cwl new file mode 100644 index 000000000..165585494 --- /dev/null +++ b/tests/wf/wc-tool.cwl @@ -0,0 +1,17 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool +cwlVersion: v1.0 + +inputs: + file1: File + +outputs: + output: + type: File + outputBinding: { glob: output } + +baseCommand: [wc, -l] + +stdin: $(inputs.file1.path) +stdout: output