Skip to content

Commit 74404c4

Browse files
/api/v1/study/1/samples accepts new categories (#3235)
* /api/v1/study/1/samples accepts new categories calling PATCH on /api/v1/study/1/samples now silently accepts metadata with new categories in them when updating a sample or adding a new one. Existing samples will be assigned a null value while samples in the metadata will be assigned the value or empty string if one was not given. * Fixed status code When a PATCH includes new samples as well as existing ones, 201 should be returned on success, rather than 200. * Fixed tests Tests ran fine individually, but failed when ran as a whole suite because categories and samples added for one test would affect tests downstream. * Remove indeterminate state causing condition from REST call.
1 parent 9ceadff commit 74404c4

File tree

2 files changed

+214
-35
lines changed

2 files changed

+214
-35
lines changed

qiita_pet/handlers/rest/study_samples.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -168,15 +168,12 @@ def patch(self, study_id):
168168

169169
categories = set(study.sample_template.categories)
170170

171-
if set(data.columns) != categories:
172-
if set(data.columns).issubset(categories):
173-
self.fail('Not all sample information categories provided',
174-
400)
175-
else:
176-
unknown = set(data.columns) - categories
177-
self.fail("Some categories do not exist in the sample "
178-
"information", 400,
179-
categories_not_found=sorted(unknown))
171+
# issuperset() will return True for true supersets or exact matches.
172+
# In either case, keep processing. Subsets of categories remain
173+
# invalid, however.
174+
if not set(data.columns).issuperset(categories):
175+
self.fail('Not all sample information categories provided',
176+
400)
180177
return
181178

182179
existing_samples = set(sample_info.index)
@@ -186,16 +183,22 @@ def patch(self, study_id):
186183

187184
# warnings generated are not currently caught
188185
# see https://github.com/biocore/qiita/issues/2096
189-
if overlapping_ids:
190-
to_update = data.loc[overlapping_ids]
191-
study.sample_template.update(to_update)
192-
status = 200
193186

187+
# processing new_ids first allows us to extend the sample_template
188+
# w/new columns before calling update(). update() will return an
189+
# error if unexpected columns are found.
194190
if new_ids:
195191
to_extend = data.loc[new_ids]
196192
study.sample_template.extend(to_extend)
197193
status = 201
198194

195+
if overlapping_ids:
196+
to_update = data.loc[overlapping_ids]
197+
study.sample_template.update(to_update)
198+
if status == 500:
199+
# don't overwrite a possible status = 201
200+
status = 200
201+
199202
self.set_status(status)
200203
self.finish()
201204

qiita_pet/test/rest/test_study_samples.py

Lines changed: 198 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#
66
# The full license is in the file LICENSE, distributed with this software.
77
# -----------------------------------------------------------------------------
8-
8+
import json
99
from unittest import main
1010
from datetime import datetime
1111

@@ -16,23 +16,141 @@
1616
from qiita_pet.test.rest.test_base import RESTHandlerTestCase
1717

1818

19-
def _sample_creator(ids):
20-
categories = ['season_environment', 'env_package',
21-
'assigned_from_geo', 'texture', 'taxon_id',
22-
'depth', 'host_taxid', 'common_name',
23-
'water_content_soil', 'elevation', 'temp',
24-
'tot_nitro', 'samp_salinity', 'altitude',
25-
'env_biome', 'country', 'ph', 'anonymized_name',
26-
'tot_org_carb', 'description_duplicate',
27-
'env_feature', 'physical_specimen_location',
28-
'physical_specimen_remaining', 'dna_extracted',
29-
'sample_type', 'collection_timestamp',
30-
'host_subject_id', 'description',
31-
'latitude', 'longitude', 'scientific_name']
19+
def _sample_creator(ids, categories=None):
20+
if categories is None:
21+
categories = ['season_environment', 'env_package',
22+
'assigned_from_geo', 'texture', 'taxon_id',
23+
'depth', 'host_taxid', 'common_name',
24+
'water_content_soil', 'elevation', 'temp',
25+
'tot_nitro', 'samp_salinity', 'altitude',
26+
'env_biome', 'country', 'ph', 'anonymized_name',
27+
'tot_org_carb', 'description_duplicate',
28+
'env_feature', 'physical_specimen_location',
29+
'physical_specimen_remaining', 'dna_extracted',
30+
'sample_type', 'collection_timestamp',
31+
'host_subject_id', 'description',
32+
'latitude', 'longitude', 'scientific_name']
3233
return {i: {c: 1 for c in categories} for i in ids}
3334

3435

3536
class StudySamplesHandlerTests(RESTHandlerTestCase):
37+
def _get_sample_categories(self, study_id):
38+
response = self.get('/api/v1/study/1/samples/info',
39+
headers=self.headers)
40+
self.assertEqual(response.code, 200)
41+
obs = json_decode(response.body)
42+
return obs['categories']
43+
44+
def test_patch_accept_new_categories(self):
45+
body = {'1.SKM1.999998': {'dna_extracted': 'foo',
46+
'host_taxid': 'foo',
47+
'altitude': 'foo',
48+
'description_duplicate': 'foo',
49+
'temp': 'foo',
50+
'country': 'foo',
51+
'texture': 'foo',
52+
'latitude': '32.7157',
53+
'assigned_from_geo': 'foo',
54+
'tot_org_carb': 'foo',
55+
'env_feature': 'foo',
56+
'depth': 'foo',
57+
'tot_nitro': 'foo',
58+
'anonymized_name': 'foo',
59+
'scientific_name': 'foo',
60+
'samp_salinity': 'foo',
61+
'ph': 'foo',
62+
'taxon_id': '9999',
63+
'season_environment': 'foo',
64+
'physical_specimen_remaining': 'foo',
65+
'host_subject_id': 'foo',
66+
'water_content_soil': 'foo',
67+
'env_biome': 'foo',
68+
'env_package': 'foo',
69+
'elevation': 'foo',
70+
'collection_timestamp': ('2014-05-29 '
71+
'12:24:15'),
72+
'sample_type': 'foo',
73+
'physical_specimen_location': 'foo',
74+
'longitude': '117.1611',
75+
'common_name': 'foo',
76+
'description': 'foo'}}
77+
78+
# first, confirm this should patch successfully: all fields present
79+
# note that response is 201 if using patch to add new samples, 200 if
80+
# updating existing samples.
81+
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
82+
data=body, asjson=True)
83+
self.assertEqual(response.code, 201)
84+
85+
body = {'1.SKM1.999999': {'dna_extracted': 'foo',
86+
'host_taxid': 'foo',
87+
'altitude': 'foo',
88+
'description_duplicate': 'foo',
89+
'temp': 'foo',
90+
'country': 'foo',
91+
'texture': 'foo',
92+
'latitude': '32.7157',
93+
'assigned_from_geo': 'foo',
94+
'tot_org_carb': 'foo',
95+
'env_feature': 'foo',
96+
'depth': 'foo',
97+
'tot_nitro': 'foo',
98+
'anonymized_name': 'foo',
99+
'scientific_name': 'foo',
100+
'samp_salinity': 'foo',
101+
'ph': 'foo',
102+
'taxon_id': '9999',
103+
'season_environment': 'foo',
104+
'physical_specimen_remaining': 'foo',
105+
'host_subject_id': 'foo',
106+
'water_content_soil': 'foo',
107+
'env_biome': 'foo',
108+
'env_package': 'foo',
109+
'elevation': 'foo',
110+
'collection_timestamp': ('2014-05-29 '
111+
'12:24:15'),
112+
'sample_type': 'foo',
113+
'physical_specimen_location': 'foo',
114+
'longitude': '117.1611',
115+
'common_name': 'foo',
116+
'description': 'foo'}}
117+
118+
# add a new field to one sample_id, making body a superset of values.
119+
body['1.SKM1.999999']['new_field1'] = 'some_value'
120+
body['1.SKM1.999999']['new_field2'] = 'another_value'
121+
122+
# this test should pass.
123+
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
124+
data=body, asjson=True)
125+
self.assertEqual(response.code, 201)
126+
127+
# confirm new samples were added.
128+
response = self.get('/api/v1/study/1/samples', headers=self.headers)
129+
self.assertEqual(response.code, 200)
130+
obs = json_decode(response.body)
131+
self.assertIn('1.SKM1.999998', obs)
132+
self.assertIn('1.SKM1.999999', obs)
133+
134+
# confirm new categories are a part of the samples.
135+
response = self.get('/api/v1/study/1/samples/info',
136+
headers=self.headers)
137+
self.assertEqual(response.code, 200)
138+
obs = json_decode(response.body)
139+
self.assertIn('new_field1', obs['categories'])
140+
self.assertIn('new_field2', obs['categories'])
141+
142+
# remove a few existing fields, representing retired fields.
143+
for sample_id in body:
144+
del (body[sample_id]['ph'])
145+
del (body[sample_id]['water_content_soil'])
146+
147+
exp = {'message': 'Not all sample information categories provided'}
148+
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
149+
data=body, asjson=True)
150+
self.assertEqual(response.code, 400)
151+
obs = json_decode(response.body)
152+
self.assertEqual(obs, exp)
153+
36154
def test_patch_no_study(self):
37155
body = {'sampleid1': {'category_a': 'value_a'},
38156
'sampleid2': {'category_b': 'value_b'}}
@@ -91,21 +209,75 @@ def test_patch_sample_ids_exist_incomplete_metadata(self):
91209
self.assertEqual(obs, exp)
92210

93211
def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self):
94-
body = _sample_creator(['1.SKM8.640201', 'blank.a1'])
212+
current = self._get_sample_categories(1)
213+
# If no new categories, both new and existing samples should succeed.
214+
# 640201 is an existing sample. blank.a1 is a new sample
215+
body = _sample_creator(['1.SKM8.640201',
216+
'blank.a1'], categories=current)
217+
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
218+
data=body, asjson=True)
219+
self.assertEqual(response.code, 201)
220+
# successful response should be empty string
221+
self.assertEqual(response.body, b'')
222+
223+
# If new categories are added, patch() should succeed.
224+
# New and existing samples should have new categories.
225+
# 640201 is an existing sample. blank.a2 is a new sample
226+
body = _sample_creator(['1.SKM8.640201',
227+
'blank.a2'], categories=current)
228+
# body['blank.a2']['DOES_NOT_EXIST'] will be '', not None.
229+
# body['1.SKM8.640201']['WHAT'] will be '', not None.
95230
body['1.SKM8.640201']['DOES_NOT_EXIST'] = 'foo'
96-
body['blank.a1']['WHAT'] = 'bar'
231+
body['blank.a2']['WHAT'] = 'bar'
232+
233+
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
234+
data=body, asjson=True)
235+
self.assertEqual(response.code, 201)
236+
# successful response should be empty string
237+
self.assertEqual(response.body, b'')
238+
239+
response = self.get(('/api/v1/study/1/samples/categories='
240+
'does_not_exist,what'), headers=self.headers)
241+
self.assertEqual(response.code, 200)
97242

98-
exp = {'message': "Some categories do not exist in the sample "
99-
"information",
100-
'categories_not_found': ['DOES_NOT_EXIST', 'WHAT']}
243+
# decode results manually from bytes, replacing non-JSON-spec 'NaN'
244+
# values with JSON-spec 'null'. These will convert to Python None
245+
# values when load()ed.
246+
obs = response.body.decode("utf-8").replace('NaN', 'null')
247+
obs = json.loads(obs)
248+
249+
self.assertEqual(obs['header'], ['does_not_exist', 'what'])
250+
251+
self.assertEqual(obs['samples']['1.blank.a2'], ['', 'bar'])
252+
self.assertEqual(obs['samples']['1.SKM8.640201'], ['foo', ''])
253+
254+
# as the number and names of samples is dynamic, simply confirm the
255+
# other samples are unchanged.
256+
for sample in obs['samples']:
257+
if sample not in ['1.blank.a2', '1.SKM8.640201']:
258+
print(sample)
259+
self.assertEqual(obs['samples'][sample], [None, None])
260+
261+
# If categories were removed, both existing and new samples should
262+
# fail.
263+
# 640201 is an existing sample. blank.a3 is a new sample
264+
current = self._get_sample_categories(1)
265+
body = _sample_creator(['1.SKM8.640201',
266+
'blank.a3'], categories=current)
267+
del (body['1.SKM8.640201']['env_biome'])
268+
del (body['blank.a3']['env_biome'])
269+
270+
exp = {'message': 'Not all sample information categories provided'}
101271
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
102272
data=body, asjson=True)
103273
self.assertEqual(response.code, 400)
104274
obs = json_decode(response.body)
105275
self.assertEqual(obs, exp)
106276

107277
def test_patch_sample_ids_already_exist(self):
108-
body = _sample_creator(['1.SKM8.640201', '1.SKM3.640197'])
278+
current = self._get_sample_categories(1)
279+
body = _sample_creator(['1.SKM8.640201',
280+
'1.SKM3.640197'], categories=current)
109281
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
110282
data=body, asjson=True)
111283
self.assertEqual(response.code, 200)
@@ -117,7 +289,8 @@ def test_patch_sample_ids_already_exist(self):
117289
self.assertNotEqual(df.loc['1.SKM4.640180']['elevation'], '1')
118290

119291
def test_patch_sample_ids_do_not_exist(self):
120-
body = _sample_creator(['blank.a1'])
292+
current = self._get_sample_categories(1)
293+
body = _sample_creator(['blank.a1'], categories=current)
121294
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
122295
data=body, asjson=True)
123296
self.assertEqual(response.code, 201)
@@ -126,7 +299,10 @@ def test_patch_sample_ids_do_not_exist(self):
126299
self.assertEqual(df.loc['1.blank.a1']['elevation'], '1')
127300

128301
def test_patch_sample_ids_partially_exist(self):
129-
body = _sample_creator(['blank.b1', '1.SKM5.640177', '1.SKB9.640200'])
302+
current = self._get_sample_categories(1)
303+
body = _sample_creator(['blank.b1',
304+
'1.SKM5.640177',
305+
'1.SKB9.640200'], categories=current)
130306
response = self.patch('/api/v1/study/1/samples', headers=self.headers,
131307
data=body, asjson=True)
132308
self.assertEqual(response.code, 201)

0 commit comments

Comments
 (0)