33
33
# 'version': the version of the plugin,
34
34
# 'cmd_name': the command we want to run,
35
35
# 'input_name': the name of the input parameter of that command
36
+ # 'ignore_parameters': list of parameters to ignore, for example: threads
36
37
# 'parent_artifact_name': name of the parent output, input for this command
37
38
# 'parameters_names': list of the names of the parameter sets we want to run
38
39
# }
@@ -41,21 +42,24 @@ full_pipelines = [
41
42
'data_type' : ['Metagenomic' ],
42
43
'artifact_type' : 'per_sample_FASTQ' ,
43
44
'previous-step' : None ,
45
+ 'requirements' : dict (),
44
46
'steps' : [
45
47
{'previous-step' : None ,
46
48
'plugin' : 'qp-shogun' ,
47
49
'version' : '012020' ,
48
50
'cmd_name' : 'Atropos v1.1.24' ,
49
51
'input_name' : 'input' ,
52
+ 'ignore_parameters' : ['Number of threads used' ],
50
53
'parent_artifact_name' : None ,
51
54
'parameters_names' : ['KAPA HyperPlus with iTru' ]},
52
55
{'previous-step' : 'Atropos v1.1.24' ,
53
56
'plugin' : 'qp-shogun' ,
54
- 'version' : '012020 ' ,
55
- 'cmd_name' : 'Shogun v1.0.7 ' ,
57
+ 'version' : '072020 ' ,
58
+ 'cmd_name' : 'Shogun v1.0.8 ' ,
56
59
'input_name' : 'input' ,
60
+ 'ignore_parameters' : ['Number of threads' ],
57
61
'parent_artifact_name' : 'Adapter trimmed files' ,
58
- 'parameters_names' : ['wol_bowtie2' , 'rep94_bowtie2 ' ]}
62
+ 'parameters_names' : ['wol_bowtie2' , 'rep200_bowtie2 ' ]}
59
63
]},
60
64
{'name' : 'Target Gene Processing' ,
61
65
'data_type' : ['16S' , '18S' , 'ITS' ],
@@ -73,6 +77,7 @@ full_pipelines = [
73
77
'version' : '1.9.1' ,
74
78
'cmd_name' : 'Trimming' ,
75
79
'input_name' : 'input_data' ,
80
+ 'ignore_parameters' : [],
76
81
'parent_artifact_name' : None ,
77
82
'parameters_names' : ['90 base pairs' ,
78
83
'100 base pairs' ,
@@ -83,13 +88,15 @@ full_pipelines = [
83
88
'version' : '1.9.1' ,
84
89
'cmd_name' : 'Pick closed-reference OTUs' ,
85
90
'input_name' : 'input_data' ,
91
+ 'ignore_parameters' : [],
86
92
'parent_artifact_name' : 'Trimmed Demultiplexed' ,
87
93
'parameters_names' : ['Defaults - parallel' ]},
88
94
{'previous-step' : 'Trimming' ,
89
95
'plugin' : 'deblur' ,
90
96
'version' : '1.1.0' ,
91
97
'cmd_name' : 'Deblur' ,
92
98
'input_name' : 'Demultiplexed sequences' ,
99
+ 'ignore_parameters' : [],
93
100
'parent_artifact_name' : 'Trimmed Demultiplexed' ,
94
101
'parameters_names' : ['Defaults' ]}
95
102
]},
@@ -122,6 +129,22 @@ def _check_requirements(requirements, template):
122
129
return satisfied
123
130
124
131
132
+ def _check_parameters (jobs , cmd ):
133
+ params = [{k : str (v ) for k , v in j .parameters .values .items ()
134
+ if k not in cmd ['ignore_parameters' ]} for j in jobs ]
135
+ return params
136
+
137
+
138
+ def _submit_workflows (artifact_process ):
139
+ for artifact in artifact_process :
140
+ if artifact ['workflow' ] is None :
141
+ continue
142
+ # nodes will return in position [0] the first job created
143
+ first_job = list (artifact ['workflow' ].graph .nodes ())[0 ]
144
+ if first_job .status == 'in_construction' :
145
+ artifact ['workflow' ].submit ()
146
+
147
+
125
148
# Step 1. Loop over the full_pipelines to process each step
126
149
for pipeline in full_pipelines :
127
150
# Step 2. From the steps generate the list of commands to add to the
@@ -149,6 +172,7 @@ for pipeline in full_pipelines:
149
172
'previous-step' : step ['previous-step' ],
150
173
'parent_artifact_name' : step ['parent_artifact_name' ],
151
174
'input_name' : step ['input_name' ],
175
+ 'ignore_parameters' : step ['ignore_parameters' ],
152
176
'parameters' : parameters })
153
177
154
178
# Step 2. - for children. Get their commands. We currently only support
@@ -161,7 +185,9 @@ for pipeline in full_pipelines:
161
185
if c ['previous-step' ] == commands [0 ]['command-name' ]]
162
186
163
187
# Step 3. Find all preparations/artifacts that we can add the pipeline
164
- artifacts_all = [a for study in Study .iter ()
188
+ # ... as a first pass we will only process study 10317 (AGP) ...
189
+ # artifacts_all = [a for study in Study.iter()
190
+ artifacts_all = [a for study in [Study (10317 )]
165
191
# loop over all artifacts of artifact_type with in study
166
192
for a in study .artifacts (
167
193
artifact_type = pipeline ['artifact_type' ])
@@ -172,7 +198,10 @@ for pipeline in full_pipelines:
172
198
artifacts_compliant = []
173
199
for a in artifacts_all :
174
200
st = a .study .sample_template
175
- pt = a .prep_templates [0 ]
201
+ pts = a .prep_templates
202
+ if not pts :
203
+ continue
204
+ pt = pts [0 ]
176
205
177
206
# {'sandbox', 'awaiting_approval', 'private', 'public'}
178
207
if a .visibility in ('sandbox' , 'awaiting_approval' ):
@@ -194,23 +223,29 @@ for pipeline in full_pipelines:
194
223
# of Step 4 but for debugging it makes sense to separate
195
224
artifact_process = []
196
225
children_compliant = []
226
+ cmd = commands [0 ]
197
227
for a in artifacts_compliant :
198
- cmd = commands [0 ]
199
228
# getting all jobs, includen hiddens, in case the job failed
200
229
jobs = a .jobs (cmd = cmd ['command' ], show_hidden = True )
201
- params = [ j . parameters . values for j in jobs ]
230
+ params = _check_parameters ( jobs , cmd )
202
231
203
232
# checking that all required parameters of this command exist
204
233
missing_parameters = []
205
234
for p in cmd ['parameters' ]:
206
235
p = p ['values' ]
207
236
p .update ({cmd ['input_name' ]: str (a .id )})
208
- if p not in params :
237
+ p_to_compare = p .copy ()
238
+ for k in cmd ['ignore_parameters' ]:
239
+ del p_to_compare [k ]
240
+ if p_to_compare not in params :
209
241
missing_parameters .append (p )
210
242
else :
211
243
for c in a .children :
212
- if c .processing_parameters .values == p :
213
- children_compliant .append (c )
244
+ cpp = c .processing_parameters
245
+ if cpp .command .name == cmd ['command-name' ]:
246
+ cparams = _check_parameters ([cpp ], cmd )
247
+ if cparams == p_to_compare :
248
+ children_compliant .append (c )
214
249
if missing_parameters :
215
250
# note that we are building a dict for each artifact so we can
216
251
# save the workflow id, useful for when we run this in a terminal
@@ -224,14 +259,18 @@ for pipeline in full_pipelines:
224
259
for cmd_id , cmd in enumerate (children_cmds ):
225
260
# getting all jobs, includen hiddens, in case the job failed
226
261
jobs = a .jobs (cmd = cmd ['command' ], show_hidden = True )
227
- params = [ j . parameters . values for j in jobs ]
262
+ params = _check_parameters ( jobs , cmd )
228
263
229
264
# checking that all required parameters of this command exist
230
265
missing_parameters = []
231
266
for p in cmd ['parameters' ]:
232
267
p = p ['values' ]
233
- p .update ({cmd ['input_name' ]: str (c .id )})
234
- if p not in params :
268
+ p .update ({cmd ['input_name' ]: str (a .id )})
269
+ p_to_compare = p .copy ()
270
+ for k in cmd ['ignore_parameters' ]:
271
+ del p_to_compare [k ]
272
+
273
+ if p_to_compare not in params :
235
274
missing_parameters .append (p )
236
275
if missing_parameters :
237
276
artifact_process .append (
@@ -266,9 +305,9 @@ for pipeline in full_pipelines:
266
305
# now we can add the rest of the parameters to the workflow for
267
306
# the first command
268
307
for params in artifact ['missing_parameters' ][1 :]:
269
- params .update ({cmd ['input_name' ]: str (a .id )})
270
308
job_params = Parameters .load (cmd ['command' ], values_dict = params )
271
- artifact ['workflow' ].add (job_params )
309
+ artifact ['workflow' ].add (
310
+ job_params , req_params = {cmd ['input_name' ]: str (a .id )})
272
311
273
312
for cmd in commands [cmd_id + 1 :]:
274
313
# get jobs from the workflow to which we can add this new command
@@ -286,10 +325,4 @@ for pipeline in full_pipelines:
286
325
cmd ['parent_artifact_name' ]: cmd ['input_name' ]}})
287
326
288
327
# Step 7. submit the workflows!
289
- for artifact in artifact_process :
290
- if artifact ['workflow' ] is None :
291
- continue
292
- # nodes will return in position [0] the first job created
293
- first_job = list (artifact ['workflow' ].graph .nodes ())[0 ]
294
- if first_job .status == 'in_construction' :
295
- artifact ['workflow' ].submit ()
328
+ _submit_workflows (artifact_process )
0 commit comments