Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 8 additions & 14 deletions qiita_db/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,13 +1040,16 @@ def _build_mapping_file(self, samples, rename_dup_samples=False):
with qdb.sql_connection.TRN:
all_ids = set()
to_concat = []
sample_infos = dict()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated to this line, but would it be a good time to rename this method to _build_sample_metadata?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, in practice it is still merging sample/prep info files so building mapping files ...

for aid, samps in samples.items():
pt = qdb.artifact.Artifact(aid).prep_templates[0]
qiime_map_fp = pt.qiime_map_fp
artifact = qdb.artifact.Artifact(aid)
si = artifact.study.sample_template
if si not in sample_infos:
sample_infos[si] = si.to_dataframe()
pt = artifact.prep_templates[0]
pt_df = pt.to_dataframe()

# Parse the mapping file
qm = qdb.metadata_template.util.load_template_to_dataframe(
qiime_map_fp, index='#SampleID')
qm = pt_df.join(sample_infos[si], lsuffix="_prep")

# if we are not going to merge the duplicated samples
# append the aid to the sample name
Expand Down Expand Up @@ -1076,15 +1079,6 @@ def _build_mapping_file(self, samples, rename_dup_samples=False):

merged_map = pd.concat(to_concat)

# forcing QIIME column order
cols = merged_map.columns.values.tolist()
cols.remove('BarcodeSequence')
cols.remove('LinkerPrimerSequence')
cols.remove('Description')
cols = (['BarcodeSequence', 'LinkerPrimerSequence'] + cols +
['Description'])
merged_map = merged_map[cols]

# Save the mapping file
_, base_fp = qdb.util.get_mountpoint(self._table)[0]
mapping_fp = join(base_fp, "%d_analysis_mapping.txt" % self._id)
Expand Down
10 changes: 7 additions & 3 deletions qiita_db/handlers/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,25 @@ def get(self, prep_id):
'investigation_type': prep info investigation type
'study': study that the prep info belongs to
'status': prep info status
'qiime-map': the path to the qiime mapping file
'sample-file': the path to the sample information file
'prep-file': the path to the prep info file
"""
with qdb.sql_connection.TRN:
pt = _get_prep_template(prep_id)
prep_files = [fp for _, fp in pt.get_filepaths()
if 'qiime' not in basename(fp)]
artifact = pt.artifact.id if pt.artifact is not None else None
sid = pt.study_id
response = {
'data_type': pt.data_type(),
'artifact': artifact,
'investigation_type': pt.investigation_type,
'study': pt.study_id,
'study': sid,
'status': pt.status,
'qiime-map': pt.qiime_map_fp,
# get_filepaths returns an ordered list of [filepath_id,
# filepath] and we want the last pair
'sample-file': qdb.study.Study(
sid).sample_template.get_filepaths()[0][1],
# The first element in the prep_files is the newest
# prep information file - hence the correct one
'prep-file': prep_files[0]
Expand Down
4 changes: 2 additions & 2 deletions qiita_db/handlers/tests/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def test_get(self):
self.assertEqual(obs['investigation_type'], 'Metagenomics')
self.assertEqual(obs['study'], 1)
self.assertEqual(obs['status'], 'private')
self.assertTrue(obs['qiime-map'].startswith(
path_builder('1_prep_1_qiime_')))
self.assertTrue(obs['sample-file'].startswith(
path_builder('1_')))
self.assertTrue(obs['prep-file'].startswith(
path_builder('1_prep_1_')))

Expand Down
113 changes: 0 additions & 113 deletions qiita_db/metadata_template/prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@
from os.path import join
from time import strftime
from copy import deepcopy
import warnings
from skbio.util import find_duplicates

import pandas as pd

from qiita_core.exceptions import IncompetentQiitaDeveloperError
import qiita_db as qdb
from .constants import (PREP_TEMPLATE_COLUMNS, TARGET_GENE_DATA_TYPES,
Expand Down Expand Up @@ -519,116 +516,6 @@ def generate_files(self, samples=None, columns=None):
fp_id = qdb.util.convert_to_id("prep_template", "filepath_type")
self.add_filepath(fp, fp_id=fp_id)

# creating QIIME mapping file
self.create_qiime_mapping_file()

def create_qiime_mapping_file(self):
"""This creates the QIIME mapping file and links it in the db.

Returns
-------
filepath : str
The filepath of the created QIIME mapping file

Raises
------
ValueError
If the prep template is not a subset of the sample template
QiitaDBWarning
If the QIIME-required columns are not present in the template

Notes
-----
We cannot ensure that the QIIME-required columns are present in the
metadata map. However, we have to generate a QIIME-compliant mapping
file. Since the user may need a QIIME mapping file, but not these
QIIME-required columns, we are going to create them and
populate them with the value XXQIITAXX.
"""
with qdb.sql_connection.TRN:
rename_cols = {
'barcode': 'BarcodeSequence',
'primer': 'LinkerPrimerSequence',
'description': 'Description',
}

if 'reverselinkerprimer' in self.categories():
rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer'
new_cols = ['BarcodeSequence', 'LinkerPrimerSequence',
'ReverseLinkerPrimer']
else:
new_cols = ['BarcodeSequence', 'LinkerPrimerSequence']

# Retrieve the latest sample template
# Since we sorted the filepath retrieval, the first result contains
# the filepath that we want. `retrieve_filepaths` returns a
# 3-tuple, in which the fp is the second element
sample_template_fp = qdb.util.retrieve_filepaths(
"sample_template_filepath", "study_id", self.study_id,
sort='descending')[0]['fp']

# reading files via pandas
st = qdb.metadata_template.util.load_template_to_dataframe(
sample_template_fp)
pt = self.to_dataframe()

st_sample_names = set(st.index)
pt_sample_names = set(pt.index)

if not pt_sample_names.issubset(st_sample_names):
raise ValueError(
"Prep template is not a sub set of the sample template, "
"file: %s - samples: %s"
% (sample_template_fp,
', '.join(pt_sample_names-st_sample_names)))

mapping = pt.join(st, lsuffix="_prep")
mapping.rename(columns=rename_cols, inplace=True)

# Pre-populate the QIIME-required columns with the value XXQIITAXX
index = mapping.index
placeholder = ['XXQIITAXX'] * len(index)
missing = []
for val in rename_cols.values():
if val not in mapping:
missing.append(val)
mapping[val] = pd.Series(placeholder, index=index)

if missing:
warnings.warn(
"Some columns required to generate a QIIME-compliant "
"mapping file are not present in the template. A "
"placeholder value (XXQIITAXX) has been used to populate "
"these columns. Missing columns: %s"
% ', '.join(sorted(missing)),
qdb.exceptions.QiitaDBWarning)

# Gets the orginal mapping columns and readjust the order to comply
# with QIIME requirements
cols = mapping.columns.values.tolist()
cols.remove('BarcodeSequence')
cols.remove('LinkerPrimerSequence')
cols.remove('Description')
new_cols.extend(cols)
new_cols.append('Description')
mapping = mapping[new_cols]

# figuring out the filepath for the QIIME map file
_id, fp = qdb.util.get_mountpoint('templates')[0]
filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id,
self.id, strftime("%Y%m%d-%H%M%S")))

# Save the mapping file
mapping.to_csv(filepath, index_label='#SampleID', na_rep='',
sep='\t', encoding='utf-8')

# adding the fp to the object
self.add_filepath(
filepath,
fp_id=qdb.util.convert_to_id("qiime_map", "filepath_type"))

return filepath

@property
def status(self):
"""The status of the prep template
Expand Down
31 changes: 6 additions & 25 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,9 +954,9 @@ def _common_creation_checks(self, pt, fp_count, name):
for s_id in exp_sample_ids:
self.assertEqual(pt[s_id]._to_dict(), exp_dict[s_id])

# prep and qiime files have been created
# prep files have been created
filepaths = pt.get_filepaths()
self.assertEqual(len(filepaths), 2)
self.assertEqual(len(filepaths), 1)

def test_validate_restrictions(self):
PT = qdb.metadata_template.prep_template.PrepTemplate
Expand Down Expand Up @@ -1019,28 +1019,9 @@ def test_generate_files(self):
fp_count = qdb.util.get_count("qiita.filepath")
self.tester.generate_files()
obs = qdb.util.get_count("qiita.filepath")
# We just make sure that the count has been increased by 2, since
# We just make sure that the count has been increased by 1, since
# the contents of the files have been tested elsewhere.
self.assertEqual(obs, fp_count + 2)

def test_create_qiime_mapping_file(self):
pt = qdb.metadata_template.prep_template.PrepTemplate(1)

# creating prep template file
_id, fp = qdb.util.get_mountpoint('templates')[0]

obs_fp = pt.create_qiime_mapping_file()
exp_fp = join(fp, '1_prep_1_qiime_19700101-000000.txt')

obs = pd.read_csv(obs_fp, sep='\t', infer_datetime_format=False,
parse_dates=False, index_col=False, comment='\t')
exp = pd.read_csv(
exp_fp, sep='\t', infer_datetime_format=False,
parse_dates=False, index_col=False, comment='\t')
obs = obs.reindex(sorted(obs.columns), axis=1)
exp = exp.reindex(sorted(exp.columns), axis=1)

assert_frame_equal(obs, exp, check_like=True)
self.assertEqual(obs, fp_count + 1)

def test_create_data_type_id(self):
"""Creates a new PrepTemplate passing the data_type_id"""
Expand Down Expand Up @@ -1119,9 +1100,9 @@ def test_create_warning(self):
for s_id in exp_sample_ids:
self.assertEqual(pt[s_id]._to_dict(), exp_dict[s_id])

# prep and qiime files have been created
# prep files have been created
filepaths = pt.get_filepaths()
self.assertEqual(len(filepaths), 2)
self.assertEqual(len(filepaths), 1)

# cleaning
qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id)
Expand Down
4 changes: 2 additions & 2 deletions qiita_db/metadata_template/test/test_sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1365,9 +1365,9 @@ def test_generate_files(self):
fp_count = qdb.util.get_count("qiita.filepath")
self.tester.generate_files()
obs = qdb.util.get_count("qiita.filepath")
# We just make sure that the count has been increased by 6, since
# We just make sure that the count has been increased by 3, since
# the contents of the files have been tested elsewhere.
self.assertEqual(obs, fp_count + 5)
self.assertEqual(obs, fp_count + 3)

def test_to_file(self):
"""to file writes a tab delimited file with all the metadata"""
Expand Down
Loading