From 4c2a088de8c2d6325ac8c03351b961f53cf19bbc Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Mon, 5 Sep 2022 15:14:33 +1200 Subject: [PATCH 1/9] Show cwltool --make-template to users in the chapter where we introduce the inputs objects --- src/topics/inputs.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/topics/inputs.md b/src/topics/inputs.md index 0ab20d05..123c60aa 100644 --- a/src/topics/inputs.md +++ b/src/topics/inputs.md @@ -29,6 +29,26 @@ Create a file called `inp-job.yml`: :name: inp-job.yml ``` +````{note} +You can use `cwltool` to create a template input object. That saves you from having +to type all the input parameters in a input object file: + +```bash +$ cwltool --make-template inp.cwl +INFO /home/kinow/Development/python/workspace/cwltool/venv/bin/cwltool 3.1.20220621073108 +INFO Resolved 'inp.cwl' to 'file:///tmp/inp.cwl' +example_string: a_string # type "string" +example_int: 0 # type "int" +example_flag: false # type "boolean" +example_file: # type "File" (optional) + class: File + path: a/file/path +``` + +You can redirect the output to a file, i.e. `cwltool --make-template inp.cwl > inp-job.yml`, +and then modify the default values with your desired input values. +```` + Notice that "example_file", as a `File` type, must be provided as an object with the fields `class: File` and `path`. From ff29772ebbec474880ab1c7d4719f34ce90ff8ea Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Mon, 5 Sep 2022 18:02:49 +1200 Subject: [PATCH 2/9] Add a troubleshooting section for the cachedir docs --- src/topics/index.md | 1 + src/topics/troubleshooting.md | 151 ++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 src/topics/troubleshooting.md diff --git a/src/topics/index.md b/src/topics/index.md index 90920979..09ec4142 100644 --- a/src/topics/index.md +++ b/src/topics/index.md @@ -21,4 +21,5 @@ best-practices.md file-formats.md metadata-and-authorship.md specifying-software-requirements.md +troubleshooting.md ``` diff --git a/src/topics/troubleshooting.md b/src/topics/troubleshooting.md new file mode 100644 index 00000000..f1f5618e --- /dev/null +++ b/src/topics/troubleshooting.md @@ -0,0 +1,151 @@ +# Troubleshooting + +In this section you will find ways to troubleshoot when you have problems executing CWL, +specifically when using `cwltool` but some of these techniques may apply to different +CWL Runners as well. + +## Run `cwltool` with `cachedir` + +You can use the `--cachedir` option when running a workflow to tell `cwltool` to +cache intermediate files (files that are not input nor output files, but created +during runtime for the execution). By default, these files are created in the +temporary directory, but writing them to a separate directory makes it easier. + +The following example `troubleshooting-wf1.cwl` has a **typo in the second step**, +where instead of calling `touch` is it calling `ouch`. We enforce the execution +of `step_a`, followed by `step_b`. This means that the `step_a.txt` is produced +before the `step_b` fails to produce a file. + +```{code-block} cwl +:name: "`troubleshooting-wf1.cwl`" +:caption: "`troubleshooting-wf1.cwl`" +cwlVersion: v1.2 +class: Workflow + +inputs: [] +outputs: [] + +steps: + step_a: + run: + class: CommandLineTool + inputs: [] + outputs: + step_a_file: + type: File + outputBinding: + glob: 'step_a.txt' + arguments: ['touch', 'step_a.txt'] + in: [] + out: [step_a_file] + step_b: + run: + class: CommandLineTool + inputs: [] + outputs: [] + arguments: ['ouch', 'step_b.txt'] + # To force step_b to wait for step_a + in: + step_a_file: + source: step_a/step_a_file + out: [] +``` + +```{note} +The CWL Standard does not guarantee the execution order of Workflow Steps. They can +be executed in any arbitrary order, or in paralell. So the in the `troubleshooting-wf1.cwl` +CWL document we enforce the order by chaining the output of `step_a` into an input +of `step_b`. +``` + +Let's execute this workflow with `/tmp/cachedir/` as the `--cachedir` value (`cwltool` create the +directory for you if it does not already exist): + +```{code-block} console +:emphasize-lines: 12-14, 19-21 +$ cwltool --cachedir /tmp/cachedir/ troubleshoot_wf1.cwl +INFO /home/kinow/Development/python/workspace/user_guide/venv/bin/cwltool 3.1.20220830195442 +INFO Resolved 'troubleshoot_wf1.cwl' to 'file:///tmp/troubleshoot_wf1.cwl' +WARNING Workflow checker warning: +troubleshoot_wf1.cwl:28:7: 'step_a_file' is not an input parameter of ordereddict([('class', + 'CommandLineTool'), ('inputs', []), ('outputs', []), ('arguments', + ['ouch', 'step_b.txt']), ('id', + '_:af6cdc76-ead7-4438-94a2-f9f96b3d70c5')]), expected +INFO [workflow ] start +INFO [workflow ] starting step step_a +INFO [step step_a] start +INFO [job step_a] Output of job will be cached in /tmp/cachedir/5504f8afaebc04b48f07e6e5f2b5237b +INFO [job step_a] /tmp/cachedir/5504f8afaebc04b48f07e6e5f2b5237b$ touch \ + step_a.txt +INFO [job step_a] completed success +INFO [step step_a] completed success +INFO [workflow ] starting step step_b +INFO [step step_b] start +INFO [job step_b] Output of job will be cached in /tmp/cachedir/feec39505ecca29dce1a210f75b12283 +INFO [job step_b] /tmp/cachedir/feec39505ecca29dce1a210f75b12283$ ouch \ + step_b.txt +ERROR 'ouch' not found: [Errno 2] No such file or directory: 'ouch' +WARNING [job step_b] completed permanentFail +WARNING [step step_b] completed permanentFail +INFO [workflow ] completed permanentFail +{} +WARNING Final process status is permanentFail +``` + +The workflow is in the `permanentFail` status, but you can inspect the intermediate +files created: + +```{code-block} console +:emphasize-lines: 4 +$ tree /tmp/cachedir +/tmp/cachedir +├── 5504f8afaebc04b48f07e6e5f2b5237b +│ └── step_a.txt +├── 5504f8afaebc04b48f07e6e5f2b5237b.status +├── abz3v9aq +├── feec39505ecca29dce1a210f75b12283 +└── feec39505ecca29dce1a210f75b12283.status + +3 directories, 3 files +``` + +The `.status` files display the status of each step executed by the workflow. And +the `step_a.txt` is visible in the output. Note that `cwltool` shows what where +the workflow step outputs are being cached near “`Output of job will be cached (…)`”. + +The next time you execute the same command, `cwltool` will use the cached output +of the workflow steps. Before doing so, fix the typo so `step_b` now runs `touch`. +After fixing the typo, when you execute `cwltool` with the same arguments as the +previous time, note that `cwltool` output will contain information about pre-cached +outputs, and about a new cache entry for the output of `step_b`. + +```{code-block} console +:emphasize-lines: 12, 16-18 +$ cwltool --cachedir /tmp/cachedir/ troubleshoot_wf1.cwl +INFO /home/kinow/Development/python/workspace/user_guide/venv/bin/cwltool 3.1.20220830195442 +INFO Resolved 'troubleshoot_wf1.cwl' to 'file:///tmp/troubleshoot_wf1.cwl' +WARNING Workflow checker warning: +troubleshoot_wf1.cwl:28:7: 'step_a_file' is not an input parameter of ordereddict([('class', + 'CommandLineTool'), ('inputs', []), ('outputs', []), ('arguments', + ['touch', 'step_b.txt']), ('id', + '_:50e379f8-dce8-4794-9142-c53dc4e0e30d')]), expected +INFO [workflow ] start +INFO [workflow ] starting step step_a +INFO [step step_a] start +INFO [job step_a] Using cached output in /tmp/cachedir/5504f8afaebc04b48f07e6e5f2b5237b +INFO [step step_a] completed success +INFO [workflow ] starting step step_b +INFO [step step_b] start +INFO [job step_b] Output of job will be cached in /tmp/cachedir/822d8caf8894683f434ed8eb8be1b10d +INFO [job step_b] /tmp/cachedir/822d8caf8894683f434ed8eb8be1b10d$ touch \ + step_b.txt +INFO [job step_b] completed success +INFO [step step_b] completed success +INFO [workflow ] completed success +{} +INFO Final process status is success +``` + +In this example, the workflow step `step_a` was executed only once as its output was cached, +even though we executed `cwltool` twice. This can be useful for troubleshooting your CWL document, +while also avoiding recomputing steps. From dab45fefcc9220c03a8d507df6ba2a343a7ba443 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Tue, 6 Sep 2022 11:19:40 +1200 Subject: [PATCH 3/9] Proof-read cachedir docs --- src/topics/troubleshooting.md | 53 +++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/src/topics/troubleshooting.md b/src/topics/troubleshooting.md index f1f5618e..b322d0cd 100644 --- a/src/topics/troubleshooting.md +++ b/src/topics/troubleshooting.md @@ -1,20 +1,21 @@ # Troubleshooting -In this section you will find ways to troubleshoot when you have problems executing CWL, -specifically when using `cwltool` but some of these techniques may apply to different -CWL Runners as well. +In this section you will find ways to troubleshoot when you have problems executing CWL. +We focus on `cwltool` here but some of these techniques may apply to other CWL Runners. ## Run `cwltool` with `cachedir` You can use the `--cachedir` option when running a workflow to tell `cwltool` to cache intermediate files (files that are not input nor output files, but created -during runtime for the execution). By default, these files are created in the -temporary directory, but writing them to a separate directory makes it easier. +while your workflow is running). By default, these files are created in a +temporary directory but writing them to a separate directory makes accessing +them easier. -The following example `troubleshooting-wf1.cwl` has a **typo in the second step**, -where instead of calling `touch` is it calling `ouch`. We enforce the execution -of `step_a`, followed by `step_b`. This means that the `step_a.txt` is produced -before the `step_b` fails to produce a file. +In the following example `troubleshooting-wf1.cwl` we have two steps, `step_a` and `step_b`. +These two steps are executed in order (we enforce it, see note below). The first step, +`step_a`, executes the `touch` command. The second step, `step_b`, **has a typo**, +where instead of also executing the `touch` command it tries to execute `ouch`, which +fails. ```{code-block} cwl :name: "`troubleshooting-wf1.cwl`" @@ -58,8 +59,8 @@ CWL document we enforce the order by chaining the output of `step_a` into an inp of `step_b`. ``` -Let's execute this workflow with `/tmp/cachedir/` as the `--cachedir` value (`cwltool` create the -directory for you if it does not already exist): +Let's execute this workflow with `/tmp/cachedir/` as the `--cachedir` value (`cwltool` will +create the directory for you if it does not exist already): ```{code-block} console :emphasize-lines: 12-14, 19-21 @@ -92,8 +93,10 @@ INFO [workflow ] completed permanentFail WARNING Final process status is permanentFail ``` -The workflow is in the `permanentFail` status, but you can inspect the intermediate -files created: +The workflow is in the `permanentFail` status due to `step_b` failing to execute the +non-existent `ouch` command. The `step_a` was executed successfully and its output +has been cached in your `cachedir` location. You can inspect the intermediate files +created: ```{code-block} console :emphasize-lines: 4 @@ -109,15 +112,15 @@ $ tree /tmp/cachedir 3 directories, 3 files ``` -The `.status` files display the status of each step executed by the workflow. And -the `step_a.txt` is visible in the output. Note that `cwltool` shows what where -the workflow step outputs are being cached near “`Output of job will be cached (…)`”. +Each workflow step has received a unique ID (the long value that looks like a hash). +The `${HASH}.status` files display the status of each step executed by the workflow. +And the `step_a` output file `step_a.txt` is visible in the output of the command above. -The next time you execute the same command, `cwltool` will use the cached output -of the workflow steps. Before doing so, fix the typo so `step_b` now runs `touch`. -After fixing the typo, when you execute `cwltool` with the same arguments as the -previous time, note that `cwltool` output will contain information about pre-cached -outputs, and about a new cache entry for the output of `step_b`. +Now fix the typo so `step_b` executes `touch` (i.e. replace `ouch` by `touch` in the +`step_b`). After fixing the typo, when you execute `cwltool` with the same arguments +as the previous time, note that now `cwltool` output contains information about +pre-cached outputs for `step_a`, and about a new cache entry for the output of `step_b`. +Also note that the status of `step_b` is now of success. ```{code-block} console :emphasize-lines: 12, 16-18 @@ -146,6 +149,8 @@ INFO [workflow ] completed success INFO Final process status is success ``` -In this example, the workflow step `step_a` was executed only once as its output was cached, -even though we executed `cwltool` twice. This can be useful for troubleshooting your CWL document, -while also avoiding recomputing steps. +In this example the workflow step `step_a` was not re-evaluated as it had been cached, and +there was no change in its execution or output. Furthermore, `cwltool` was able to recognize +when it had to re-evaluate `step_b` after we fixed its executable name. This technique is +useful for troubleshooting your CWL documents and also as a way to prevent `cwltool` to +re-evaluate steps unnecessarily. From b6e4ef7a752be84c2564679b7b2b37653303dff8 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Wed, 21 Sep 2022 09:46:27 +1200 Subject: [PATCH 4/9] Use runcmd --- .../troubleshooting-wf1-stepb-fixed.cwl | 30 ++++++ .../troubleshooting/troubleshooting-wf1.cwl | 30 ++++++ src/topics/inputs.md | 12 +-- src/topics/troubleshooting.md | 102 ++---------------- 4 files changed, 70 insertions(+), 104 deletions(-) create mode 100644 src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl create mode 100644 src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl diff --git a/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl b/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl new file mode 100644 index 00000000..a11b2260 --- /dev/null +++ b/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl @@ -0,0 +1,30 @@ +cwlVersion: v1.2 +class: Workflow + +inputs: [] +outputs: [] + +steps: + step_a: + run: + class: CommandLineTool + inputs: [] + outputs: + step_a_file: + type: File + outputBinding: + glob: 'step_a.txt' + arguments: ['touch', 'step_a.txt'] + in: [] + out: [step_a_file] + step_b: + run: + class: CommandLineTool + inputs: [] + outputs: [] + arguments: ['touch', 'step_b.txt'] + # To force step_b to wait for step_a + in: + step_a_file: + source: step_a/step_a_file + out: [] diff --git a/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl b/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl new file mode 100644 index 00000000..49f4f43d --- /dev/null +++ b/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl @@ -0,0 +1,30 @@ +cwlVersion: v1.2 +class: Workflow + +inputs: [] +outputs: [] + +steps: + step_a: + run: + class: CommandLineTool + inputs: [] + outputs: + step_a_file: + type: File + outputBinding: + glob: 'step_a.txt' + arguments: ['touch', 'step_a.txt'] + in: [] + out: [step_a_file] + step_b: + run: + class: CommandLineTool + inputs: [] + outputs: [] + arguments: ['ouch', 'step_b.txt'] + # To force step_b to wait for step_a + in: + step_a_file: + source: step_a/step_a_file + out: [] diff --git a/src/topics/inputs.md b/src/topics/inputs.md index 123c60aa..edfb0063 100644 --- a/src/topics/inputs.md +++ b/src/topics/inputs.md @@ -33,16 +33,8 @@ Create a file called `inp-job.yml`: You can use `cwltool` to create a template input object. That saves you from having to type all the input parameters in a input object file: -```bash -$ cwltool --make-template inp.cwl -INFO /home/kinow/Development/python/workspace/cwltool/venv/bin/cwltool 3.1.20220621073108 -INFO Resolved 'inp.cwl' to 'file:///tmp/inp.cwl' -example_string: a_string # type "string" -example_int: 0 # type "int" -example_flag: false # type "boolean" -example_file: # type "File" (optional) - class: File - path: a/file/path +```{runcmd} cwltool --make-template inp.cwl +:working-directory: src/_includes/cwl/inputs ``` You can redirect the output to a file, i.e. `cwltool --make-template inp.cwl > inp-job.yml`, diff --git a/src/topics/troubleshooting.md b/src/topics/troubleshooting.md index b322d0cd..bd9ab532 100644 --- a/src/topics/troubleshooting.md +++ b/src/topics/troubleshooting.md @@ -17,39 +17,11 @@ These two steps are executed in order (we enforce it, see note below). The first where instead of also executing the `touch` command it tries to execute `ouch`, which fails. -```{code-block} cwl +```{literalinclude} /_includes/cwl/troubleshooting/troubleshooting-wf1.cwl +:language: cwl :name: "`troubleshooting-wf1.cwl`" :caption: "`troubleshooting-wf1.cwl`" -cwlVersion: v1.2 -class: Workflow - -inputs: [] -outputs: [] - -steps: - step_a: - run: - class: CommandLineTool - inputs: [] - outputs: - step_a_file: - type: File - outputBinding: - glob: 'step_a.txt' - arguments: ['touch', 'step_a.txt'] - in: [] - out: [step_a_file] - step_b: - run: - class: CommandLineTool - inputs: [] - outputs: [] - arguments: ['ouch', 'step_b.txt'] - # To force step_b to wait for step_a - in: - step_a_file: - source: step_a/step_a_file - out: [] +:emphasize-lines: 25 ``` ```{note} @@ -62,35 +34,9 @@ of `step_b`. Let's execute this workflow with `/tmp/cachedir/` as the `--cachedir` value (`cwltool` will create the directory for you if it does not exist already): -```{code-block} console +```{runcmd} cwltool --cachedir /tmp/cachedir/ troubleshooting-wf1.cwl +:working-directory: src/_includes/cwl/troubleshooting :emphasize-lines: 12-14, 19-21 -$ cwltool --cachedir /tmp/cachedir/ troubleshoot_wf1.cwl -INFO /home/kinow/Development/python/workspace/user_guide/venv/bin/cwltool 3.1.20220830195442 -INFO Resolved 'troubleshoot_wf1.cwl' to 'file:///tmp/troubleshoot_wf1.cwl' -WARNING Workflow checker warning: -troubleshoot_wf1.cwl:28:7: 'step_a_file' is not an input parameter of ordereddict([('class', - 'CommandLineTool'), ('inputs', []), ('outputs', []), ('arguments', - ['ouch', 'step_b.txt']), ('id', - '_:af6cdc76-ead7-4438-94a2-f9f96b3d70c5')]), expected -INFO [workflow ] start -INFO [workflow ] starting step step_a -INFO [step step_a] start -INFO [job step_a] Output of job will be cached in /tmp/cachedir/5504f8afaebc04b48f07e6e5f2b5237b -INFO [job step_a] /tmp/cachedir/5504f8afaebc04b48f07e6e5f2b5237b$ touch \ - step_a.txt -INFO [job step_a] completed success -INFO [step step_a] completed success -INFO [workflow ] starting step step_b -INFO [step step_b] start -INFO [job step_b] Output of job will be cached in /tmp/cachedir/feec39505ecca29dce1a210f75b12283 -INFO [job step_b] /tmp/cachedir/feec39505ecca29dce1a210f75b12283$ ouch \ - step_b.txt -ERROR 'ouch' not found: [Errno 2] No such file or directory: 'ouch' -WARNING [job step_b] completed permanentFail -WARNING [step step_b] completed permanentFail -INFO [workflow ] completed permanentFail -{} -WARNING Final process status is permanentFail ``` The workflow is in the `permanentFail` status due to `step_b` failing to execute the @@ -98,18 +44,8 @@ non-existent `ouch` command. The `step_a` was executed successfully and its outp has been cached in your `cachedir` location. You can inspect the intermediate files created: -```{code-block} console +```{runcmd} tree /tmp/cachedir :emphasize-lines: 4 -$ tree /tmp/cachedir -/tmp/cachedir -├── 5504f8afaebc04b48f07e6e5f2b5237b -│ └── step_a.txt -├── 5504f8afaebc04b48f07e6e5f2b5237b.status -├── abz3v9aq -├── feec39505ecca29dce1a210f75b12283 -└── feec39505ecca29dce1a210f75b12283.status - -3 directories, 3 files ``` Each workflow step has received a unique ID (the long value that looks like a hash). @@ -122,31 +58,9 @@ as the previous time, note that now `cwltool` output contains information about pre-cached outputs for `step_a`, and about a new cache entry for the output of `step_b`. Also note that the status of `step_b` is now of success. -```{code-block} console +```{runcmd} cwltool --cachedir /tmp/cachedir/ troubleshooting-wf1-stepb-fixed.cwl +:working-directory: src/_includes/cwl/troubleshooting :emphasize-lines: 12, 16-18 -$ cwltool --cachedir /tmp/cachedir/ troubleshoot_wf1.cwl -INFO /home/kinow/Development/python/workspace/user_guide/venv/bin/cwltool 3.1.20220830195442 -INFO Resolved 'troubleshoot_wf1.cwl' to 'file:///tmp/troubleshoot_wf1.cwl' -WARNING Workflow checker warning: -troubleshoot_wf1.cwl:28:7: 'step_a_file' is not an input parameter of ordereddict([('class', - 'CommandLineTool'), ('inputs', []), ('outputs', []), ('arguments', - ['touch', 'step_b.txt']), ('id', - '_:50e379f8-dce8-4794-9142-c53dc4e0e30d')]), expected -INFO [workflow ] start -INFO [workflow ] starting step step_a -INFO [step step_a] start -INFO [job step_a] Using cached output in /tmp/cachedir/5504f8afaebc04b48f07e6e5f2b5237b -INFO [step step_a] completed success -INFO [workflow ] starting step step_b -INFO [step step_b] start -INFO [job step_b] Output of job will be cached in /tmp/cachedir/822d8caf8894683f434ed8eb8be1b10d -INFO [job step_b] /tmp/cachedir/822d8caf8894683f434ed8eb8be1b10d$ touch \ - step_b.txt -INFO [job step_b] completed success -INFO [step step_b] completed success -INFO [workflow ] completed success -{} -INFO Final process status is success ``` In this example the workflow step `step_a` was not re-evaluated as it had been cached, and From 1af334631522a14c7fef759d00b657ffde914c1f Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sat, 24 Sep 2022 18:07:19 +1200 Subject: [PATCH 5/9] Simplify the troubleshooting workflow. Ignore build directory. --- .gitignore | 1 + .../troubleshooting-wf1-stepb-fixed.cwl | 46 +++++++++++++------ .../troubleshooting/troubleshooting-wf1.cwl | 46 +++++++++++++------ src/topics/troubleshooting.md | 23 ++++------ 4 files changed, 71 insertions(+), 45 deletions(-) diff --git a/.gitignore b/.gitignore index dc47f1bc..6063055d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ _site .Rhistory .RData _build/ +build/ *.egg-info/ src/_includes/cwl/**/output.txt diff --git a/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl b/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl index a11b2260..1d477d62 100644 --- a/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl +++ b/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl @@ -1,30 +1,46 @@ cwlVersion: v1.2 class: Workflow -inputs: [] -outputs: [] +inputs: + text: + type: string + default: 'Hello World' +outputs: + reversed_message: + type: string + outputSource: step_b/reversed_message steps: step_a: run: class: CommandLineTool - inputs: [] + stdout: stdout.txt + inputs: + text: string outputs: - step_a_file: + step_a_stdout: type: File outputBinding: - glob: 'step_a.txt' - arguments: ['touch', 'step_a.txt'] - in: [] - out: [step_a_file] + glob: 'stdout.txt' + arguments: ['echo', '-n', '$(inputs.text)'] + in: + text: text + out: [step_a_stdout] step_b: run: class: CommandLineTool - inputs: [] - outputs: [] - arguments: ['touch', 'step_b.txt'] - # To force step_b to wait for step_a + stdout: stdout.txt + inputs: + step_a_stdout: File + outputs: + reversed_message: + type: string + outputBinding: + glob: stdout.txt + loadContents: true + outputEval: $(self[0].contents) + arguments: ['rev', '$(inputs.step_a_stdout)'] in: - step_a_file: - source: step_a/step_a_file - out: [] + step_a_stdout: + source: step_a/step_a_stdout + out: [reversed_message] diff --git a/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl b/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl index 49f4f43d..ff877605 100644 --- a/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl +++ b/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl @@ -1,30 +1,46 @@ cwlVersion: v1.2 class: Workflow -inputs: [] -outputs: [] +inputs: + text: + type: string + default: 'Hello World' +outputs: + reversed_message: + type: string + outputSource: step_b/reversed_message steps: step_a: run: class: CommandLineTool - inputs: [] + stdout: stdout.txt + inputs: + text: string outputs: - step_a_file: + step_a_stdout: type: File outputBinding: - glob: 'step_a.txt' - arguments: ['touch', 'step_a.txt'] - in: [] - out: [step_a_file] + glob: 'stdout.txt' + arguments: ['echo', '-n', '$(inputs.text)'] + in: + text: text + out: [step_a_stdout] step_b: run: class: CommandLineTool - inputs: [] - outputs: [] - arguments: ['ouch', 'step_b.txt'] - # To force step_b to wait for step_a + stdout: stdout.txt + inputs: + step_a_stdout: File + outputs: + reversed_message: + type: string + outputBinding: + glob: stdout.txt + loadContents: true + outputEval: $(self[0].contents) + arguments: ['revv', '$(inputs.step_a_stdout)'] in: - step_a_file: - source: step_a/step_a_file - out: [] + step_a_stdout: + source: step_a/step_a_stdout + out: [reversed_message] diff --git a/src/topics/troubleshooting.md b/src/topics/troubleshooting.md index bd9ab532..7d7aae8d 100644 --- a/src/topics/troubleshooting.md +++ b/src/topics/troubleshooting.md @@ -12,23 +12,16 @@ temporary directory but writing them to a separate directory makes accessing them easier. In the following example `troubleshooting-wf1.cwl` we have two steps, `step_a` and `step_b`. -These two steps are executed in order (we enforce it, see note below). The first step, -`step_a`, executes the `touch` command. The second step, `step_b`, **has a typo**, -where instead of also executing the `touch` command it tries to execute `ouch`, which +The workflow is equivalent to `echo "Hello World" | rev`, which would print the message +"Hello World" reversed, i.e. "dlroW olleH". However, the second step, `step_b`, **has a typo**, +where instead of executing the `rev` command it tries to execute `revv`, which fails. ```{literalinclude} /_includes/cwl/troubleshooting/troubleshooting-wf1.cwl :language: cwl :name: "`troubleshooting-wf1.cwl`" :caption: "`troubleshooting-wf1.cwl`" -:emphasize-lines: 25 -``` - -```{note} -The CWL Standard does not guarantee the execution order of Workflow Steps. They can -be executed in any arbitrary order, or in paralell. So the in the `troubleshooting-wf1.cwl` -CWL document we enforce the order by chaining the output of `step_a` into an input -of `step_b`. +:emphasize-lines: 42 ``` Let's execute this workflow with `/tmp/cachedir/` as the `--cachedir` value (`cwltool` will @@ -40,7 +33,7 @@ create the directory for you if it does not exist already): ``` The workflow is in the `permanentFail` status due to `step_b` failing to execute the -non-existent `ouch` command. The `step_a` was executed successfully and its output +non-existent `revv` command. The `step_a` was executed successfully and its output has been cached in your `cachedir` location. You can inspect the intermediate files created: @@ -50,9 +43,9 @@ created: Each workflow step has received a unique ID (the long value that looks like a hash). The `${HASH}.status` files display the status of each step executed by the workflow. -And the `step_a` output file `step_a.txt` is visible in the output of the command above. +And the `step_a` output file `stdout.txt` is visible in the output of the command above. -Now fix the typo so `step_b` executes `touch` (i.e. replace `ouch` by `touch` in the +Now fix the typo so `step_b` executes `rev` (i.e. replace `revv` by `rev` in the `step_b`). After fixing the typo, when you execute `cwltool` with the same arguments as the previous time, note that now `cwltool` output contains information about pre-cached outputs for `step_a`, and about a new cache entry for the output of `step_b`. @@ -65,6 +58,6 @@ Also note that the status of `step_b` is now of success. In this example the workflow step `step_a` was not re-evaluated as it had been cached, and there was no change in its execution or output. Furthermore, `cwltool` was able to recognize -when it had to re-evaluate `step_b` after we fixed its executable name. This technique is +when it had to re-evaluate `step_b` after we fixed the executable name. This technique is useful for troubleshooting your CWL documents and also as a way to prevent `cwltool` to re-evaluate steps unnecessarily. From 12067f4d3a28aa2b7b58ee9b7a4772bb3d2152d4 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sat, 24 Sep 2022 18:13:30 +1200 Subject: [PATCH 6/9] Install the tree command line --- .github/workflows/gh-pages.yaml | 2 +- .readthedocs.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gh-pages.yaml b/.github/workflows/gh-pages.yaml index 9c0ebcce..fcec7837 100644 --- a/.github/workflows/gh-pages.yaml +++ b/.github/workflows/gh-pages.yaml @@ -17,7 +17,7 @@ jobs: - name: Install apt packages run: | - sudo apt-get install -y graphviz + sudo apt-get install -y graphviz tree - name: Set up Python uses: actions/setup-python@v4 diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 70b76ed9..06a0000c 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,6 +9,7 @@ build: nodejs: "16" apt_packages: - graphviz + - tree sphinx: configuration: src/conf.py From 03f605293e076bcd857c505a77ba8f0089eaac7e Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" <1330696+mr-c@users.noreply.github.com> Date: Mon, 10 Oct 2022 11:02:03 +0200 Subject: [PATCH 7/9] use baseCommand --- .../cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl b/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl index 1d477d62..46e7d774 100644 --- a/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl +++ b/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl @@ -22,7 +22,8 @@ steps: type: File outputBinding: glob: 'stdout.txt' - arguments: ['echo', '-n', '$(inputs.text)'] + baseCommand: echo + arguments: [ '-n', '$(inputs.text)' ] in: text: text out: [step_a_stdout] From df03ff0ae32ff8ee533a0a5f99f70bc8724e107a Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" <1330696+mr-c@users.noreply.github.com> Date: Mon, 10 Oct 2022 11:02:12 +0200 Subject: [PATCH 8/9] use baseCommand --- src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl b/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl index ff877605..f2ca2a0e 100644 --- a/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl +++ b/src/_includes/cwl/troubleshooting/troubleshooting-wf1.cwl @@ -39,7 +39,8 @@ steps: glob: stdout.txt loadContents: true outputEval: $(self[0].contents) - arguments: ['revv', '$(inputs.step_a_stdout)'] + baseCommand: revv + arguments: [ $(inputs.step_a_stdout) ] in: step_a_stdout: source: step_a/step_a_stdout From 52eeec7fbdcfaed1b3638bab94e80955fb8fa818 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" <1330696+mr-c@users.noreply.github.com> Date: Mon, 10 Oct 2022 11:02:33 +0200 Subject: [PATCH 9/9] use baseCommand --- .../cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl b/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl index 46e7d774..90b7cb94 100644 --- a/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl +++ b/src/_includes/cwl/troubleshooting/troubleshooting-wf1-stepb-fixed.cwl @@ -40,7 +40,8 @@ steps: glob: stdout.txt loadContents: true outputEval: $(self[0].contents) - arguments: ['rev', '$(inputs.step_a_stdout)'] + baseCommand: rev + arguments: [ $(inputs.step_a_stdout) ] in: step_a_stdout: source: step_a/step_a_stdout