diff --git a/qiita_ware/ebi.py b/qiita_ware/ebi.py index a4fd8b018..489f0234e 100644 --- a/qiita_ware/ebi.py +++ b/qiita_ware/ebi.py @@ -356,6 +356,11 @@ def generate_study_xml(self): study_title = ET.SubElement(descriptor, 'STUDY_TITLE') study_title.text = escape(clean_whitespace(self.study_title)) + # study type is deprecated and not displayed anywhere on EBI-ENA; + # however it's required for submission so just injecting with Other + ET.SubElement( + descriptor, 'STUDY_TYPE', {'existing_study_type': 'Other'}) + study_abstract = ET.SubElement(descriptor, 'STUDY_ABSTRACT') study_abstract.text = clean_whitespace(escape(self.study_abstract)) diff --git a/qiita_ware/test/test_ebi.py b/qiita_ware/test/test_ebi.py index da784e0d8..28cf12b95 100644 --- a/qiita_ware/test/test_ebi.py +++ b/qiita_ware/test/test_ebi.py @@ -1314,6 +1314,7 @@ def test_parse_EBI_reply(self): Identification of the Microbiomes for Cannabis Soils + This is a preliminary study to examine the microbiota associated with \ the Cannabis plant. Soils samples from the bulk soil, soil associated with \ diff --git a/scripts/qiita-auto-processing b/scripts/qiita-auto-processing index a84990a8f..e3dcb2f39 100644 --- a/scripts/qiita-auto-processing +++ b/scripts/qiita-auto-processing @@ -33,6 +33,7 @@ user = User('antoniog@ucsd.edu') # 'version': the version of the plugin, # 'cmd_name': the command we want to run, # 'input_name': the name of the input parameter of that command +# 'ignore_parameters': list of parameters to ignore, for example: threads # 'parent_artifact_name': name of the parent output, input for this command # 'parameters_names': list of the names of the parameter sets we want to run # } @@ -41,21 +42,24 @@ full_pipelines = [ 'data_type': ['Metagenomic'], 'artifact_type': 'per_sample_FASTQ', 'previous-step': None, + 'requirements': dict(), 'steps': [ {'previous-step': None, 'plugin': 'qp-shogun', 'version': '012020', 'cmd_name': 'Atropos v1.1.24', 'input_name': 'input', + 'ignore_parameters': ['Number of threads used'], 'parent_artifact_name': None, 'parameters_names': ['KAPA HyperPlus with iTru']}, {'previous-step': 'Atropos v1.1.24', 'plugin': 'qp-shogun', - 'version': '012020', - 'cmd_name': 'Shogun v1.0.7', + 'version': '072020', + 'cmd_name': 'Shogun v1.0.8', 'input_name': 'input', + 'ignore_parameters': ['Number of threads'], 'parent_artifact_name': 'Adapter trimmed files', - 'parameters_names': ['wol_bowtie2', 'rep94_bowtie2']} + 'parameters_names': ['wol_bowtie2', 'rep200_bowtie2']} ]}, {'name': 'Target Gene Processing', 'data_type': ['16S', '18S', 'ITS'], @@ -73,6 +77,7 @@ full_pipelines = [ 'version': '1.9.1', 'cmd_name': 'Trimming', 'input_name': 'input_data', + 'ignore_parameters': [], 'parent_artifact_name': None, 'parameters_names': ['90 base pairs', '100 base pairs', @@ -83,6 +88,7 @@ full_pipelines = [ 'version': '1.9.1', 'cmd_name': 'Pick closed-reference OTUs', 'input_name': 'input_data', + 'ignore_parameters': [], 'parent_artifact_name': 'Trimmed Demultiplexed', 'parameters_names': ['Defaults - parallel']}, {'previous-step': 'Trimming', @@ -90,6 +96,7 @@ full_pipelines = [ 'version': '1.1.0', 'cmd_name': 'Deblur', 'input_name': 'Demultiplexed sequences', + 'ignore_parameters': [], 'parent_artifact_name': 'Trimmed Demultiplexed', 'parameters_names': ['Defaults']} ]}, @@ -122,6 +129,22 @@ def _check_requirements(requirements, template): return satisfied +def _check_parameters(jobs, cmd): + params = [{k: str(v) for k, v in j.parameters.values.items() + if k not in cmd['ignore_parameters']} for j in jobs] + return params + + +def _submit_workflows(artifact_process): + for artifact in artifact_process: + if artifact['workflow'] is None: + continue + # nodes will return in position [0] the first job created + first_job = list(artifact['workflow'].graph.nodes())[0] + if first_job.status == 'in_construction': + artifact['workflow'].submit() + + # Step 1. Loop over the full_pipelines to process each step for pipeline in full_pipelines: # Step 2. From the steps generate the list of commands to add to the @@ -149,6 +172,7 @@ for pipeline in full_pipelines: 'previous-step': step['previous-step'], 'parent_artifact_name': step['parent_artifact_name'], 'input_name': step['input_name'], + 'ignore_parameters': step['ignore_parameters'], 'parameters': parameters}) # Step 2. - for children. Get their commands. We currently only support @@ -161,7 +185,9 @@ for pipeline in full_pipelines: if c['previous-step'] == commands[0]['command-name']] # Step 3. Find all preparations/artifacts that we can add the pipeline - artifacts_all = [a for study in Study.iter() + # ... as a first pass we will only process study 10317 (AGP) ... + # artifacts_all = [a for study in Study.iter() + artifacts_all = [a for study in [Study(10317)] # loop over all artifacts of artifact_type with in study for a in study.artifacts( artifact_type=pipeline['artifact_type']) @@ -172,7 +198,10 @@ for pipeline in full_pipelines: artifacts_compliant = [] for a in artifacts_all: st = a.study.sample_template - pt = a.prep_templates[0] + pts = a.prep_templates + if not pts: + continue + pt = pts[0] # {'sandbox', 'awaiting_approval', 'private', 'public'} if a.visibility in ('sandbox', 'awaiting_approval'): @@ -194,23 +223,29 @@ for pipeline in full_pipelines: # of Step 4 but for debugging it makes sense to separate artifact_process = [] children_compliant = [] + cmd = commands[0] for a in artifacts_compliant: - cmd = commands[0] # getting all jobs, includen hiddens, in case the job failed jobs = a.jobs(cmd=cmd['command'], show_hidden=True) - params = [j.parameters.values for j in jobs] + params = _check_parameters(jobs, cmd) # checking that all required parameters of this command exist missing_parameters = [] for p in cmd['parameters']: p = p['values'] p.update({cmd['input_name']: str(a.id)}) - if p not in params: + p_to_compare = p.copy() + for k in cmd['ignore_parameters']: + del p_to_compare[k] + if p_to_compare not in params: missing_parameters.append(p) else: for c in a.children: - if c.processing_parameters.values == p: - children_compliant.append(c) + cpp = c.processing_parameters + if cpp.command.name == cmd['command-name']: + cparams = _check_parameters([cpp], cmd) + if cparams == p_to_compare: + children_compliant.append(c) if missing_parameters: # note that we are building a dict for each artifact so we can # save the workflow id, useful for when we run this in a terminal @@ -224,14 +259,18 @@ for pipeline in full_pipelines: for cmd_id, cmd in enumerate(children_cmds): # getting all jobs, includen hiddens, in case the job failed jobs = a.jobs(cmd=cmd['command'], show_hidden=True) - params = [j.parameters.values for j in jobs] + params = _check_parameters(jobs, cmd) # checking that all required parameters of this command exist missing_parameters = [] for p in cmd['parameters']: p = p['values'] - p.update({cmd['input_name']: str(c.id)}) - if p not in params: + p.update({cmd['input_name']: str(a.id)}) + p_to_compare = p.copy() + for k in cmd['ignore_parameters']: + del p_to_compare[k] + + if p_to_compare not in params: missing_parameters.append(p) if missing_parameters: artifact_process.append( @@ -266,9 +305,9 @@ for pipeline in full_pipelines: # now we can add the rest of the parameters to the workflow for # the first command for params in artifact['missing_parameters'][1:]: - params.update({cmd['input_name']: str(a.id)}) job_params = Parameters.load(cmd['command'], values_dict=params) - artifact['workflow'].add(job_params) + artifact['workflow'].add( + job_params, req_params={cmd['input_name']: str(a.id)}) for cmd in commands[cmd_id + 1:]: # get jobs from the workflow to which we can add this new command @@ -286,10 +325,4 @@ for pipeline in full_pipelines: cmd['parent_artifact_name']: cmd['input_name']}}) # Step 7. submit the workflows! - for artifact in artifact_process: - if artifact['workflow'] is None: - continue - # nodes will return in position [0] the first job created - first_job = list(artifact['workflow'].graph.nodes())[0] - if first_job.status == 'in_construction': - artifact['workflow'].submit() + _submit_workflows(artifact_process)