From 8c39e79cfb4ad87eff815d46eb3741f106428b01 Mon Sep 17 00:00:00 2001 From: Charles Cowart Date: Fri, 20 Jan 2023 18:10:42 -0800 Subject: [PATCH 1/4] /api/v1/study/1/samples accepts new categories calling PATCH on /api/v1/study/1/samples now silently accepts metadata with new categories in them when updating a sample or adding a new one. Existing samples will be assigned a null value while samples in the metadata will be assigned the value or empty string if one was not given. --- qiita_pet/handlers/rest/study_samples.py | 19 +-- qiita_pet/test/rest/test_study_samples.py | 195 +++++++++++++++++++++- 2 files changed, 199 insertions(+), 15 deletions(-) diff --git a/qiita_pet/handlers/rest/study_samples.py b/qiita_pet/handlers/rest/study_samples.py index 5a4804088..e9fee5a91 100644 --- a/qiita_pet/handlers/rest/study_samples.py +++ b/qiita_pet/handlers/rest/study_samples.py @@ -172,12 +172,7 @@ def patch(self, study_id): if set(data.columns).issubset(categories): self.fail('Not all sample information categories provided', 400) - else: - unknown = set(data.columns) - categories - self.fail("Some categories do not exist in the sample " - "information", 400, - categories_not_found=sorted(unknown)) - return + return existing_samples = set(sample_info.index) overlapping_ids = set(data.index).intersection(existing_samples) @@ -186,16 +181,20 @@ def patch(self, study_id): # warnings generated are not currently caught # see https://github.com/biocore/qiita/issues/2096 - if overlapping_ids: - to_update = data.loc[overlapping_ids] - study.sample_template.update(to_update) - status = 200 + # processing new_ids first allows us to extend the sample_template + # w/new columns before calling update(). update() will return an + # error if unexpected columns are found. if new_ids: to_extend = data.loc[new_ids] study.sample_template.extend(to_extend) status = 201 + if overlapping_ids: + to_update = data.loc[overlapping_ids] + study.sample_template.update(to_update) + status = 200 + self.set_status(status) self.finish() diff --git a/qiita_pet/test/rest/test_study_samples.py b/qiita_pet/test/rest/test_study_samples.py index b2c3e9a72..c25e32b7b 100644 --- a/qiita_pet/test/rest/test_study_samples.py +++ b/qiita_pet/test/rest/test_study_samples.py @@ -5,7 +5,7 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- - +import json from unittest import main from datetime import datetime @@ -33,6 +33,120 @@ def _sample_creator(ids): class StudySamplesHandlerTests(RESTHandlerTestCase): + def test_patch_accept_new_categories(self): + body = {'1.SKM1.999998': {'dna_extracted': 'foo', + 'host_taxid': 'foo', + 'altitude': 'foo', + 'description_duplicate': 'foo', + 'temp': 'foo', + 'country': 'foo', + 'texture': 'foo', + 'latitude': '32.7157', + 'assigned_from_geo': 'foo', + 'tot_org_carb': 'foo', + 'env_feature': 'foo', + 'depth': 'foo', + 'tot_nitro': 'foo', + 'anonymized_name': 'foo', + 'scientific_name': 'foo', + 'samp_salinity': 'foo', + 'ph': 'foo', + 'taxon_id': '9999', + 'season_environment': 'foo', + 'physical_specimen_remaining': 'foo', + 'host_subject_id': 'foo', + 'water_content_soil': 'foo', + 'env_biome': 'foo', + 'env_package': 'foo', + 'elevation': 'foo', + 'collection_timestamp': ('2014-05-29 ' + '12:24:15'), + 'sample_type': 'foo', + 'physical_specimen_location': 'foo', + 'longitude': '117.1611', + 'common_name': 'foo', + 'description': 'foo'}} + + # first, confirm this should patch successfully: all fields present + # note that response is 201 if using patch to add new samples, 200 if + # updating existing samples. + response = self.patch('/api/v1/study/1/samples', headers=self.headers, + data=body, asjson=True) + self.assertEqual(response.code, 201) + + body = {'1.SKM1.999999': {'dna_extracted': 'foo', + 'host_taxid': 'foo', + 'altitude': 'foo', + 'description_duplicate': 'foo', + 'temp': 'foo', + 'country': 'foo', + 'texture': 'foo', + 'latitude': '32.7157', + 'assigned_from_geo': 'foo', + 'tot_org_carb': 'foo', + 'env_feature': 'foo', + 'depth': 'foo', + 'tot_nitro': 'foo', + 'anonymized_name': 'foo', + 'scientific_name': 'foo', + 'samp_salinity': 'foo', + 'ph': 'foo', + 'taxon_id': '9999', + 'season_environment': 'foo', + 'physical_specimen_remaining': 'foo', + 'host_subject_id': 'foo', + 'water_content_soil': 'foo', + 'env_biome': 'foo', + 'env_package': 'foo', + 'elevation': 'foo', + 'collection_timestamp': ('2014-05-29 ' + '12:24:15'), + 'sample_type': 'foo', + 'physical_specimen_location': 'foo', + 'longitude': '117.1611', + 'common_name': 'foo', + 'description': 'foo'}} + + # add a new field to one sample_id, making body a superset of values. + body['1.SKM1.999999']['new_field1'] = 'some_value' + body['1.SKM1.999999']['new_field2'] = 'another_value' + + # this test should pass. + response = self.patch('/api/v1/study/1/samples', headers=self.headers, + data=body, asjson=True) + self.assertEqual(response.code, 201) + + # confirm new samples were added. + response = self.get('/api/v1/study/1/samples', headers=self.headers) + self.assertEqual(response.code, 200) + obs = json_decode(response.body) + self.assertIn('1.SKM1.999998', obs) + self.assertIn('1.SKM1.999999', obs) + + # confirm new categories are a part of the samples. + response = self.get('/api/v1/study/1/samples/info', + headers=self.headers) + self.assertEqual(response.code, 200) + obs = json_decode(response.body) + self.assertIn('new_field1', obs['categories']) + self.assertIn('new_field2', obs['categories']) + + # TODO: need a test to get metadata for 1.SKM1.999998 and 1.SKM1.999999 + # and confirm new_field1 and new_field2 are empty for the former and + # filled for the latter. + + # remove a few existing fields, representing retired fields. + for sample_id in body: + del (body[sample_id]['ph']) + del (body[sample_id]['water_content_soil']) + + exp = {'message': 'Not all sample information categories provided'} + response = self.patch('/api/v1/study/1/samples', headers=self.headers, + data=body, asjson=True) + self.assertEqual(response.code, 400) + obs = json_decode(response.body) + self.assertEqual(obs, exp) + def test_patch_no_study(self): body = {'sampleid1': {'category_a': 'value_a'}, 'sampleid2': {'category_b': 'value_b'}} @@ -91,13 +205,84 @@ def test_patch_sample_ids_exist_incomplete_metadata(self): self.assertEqual(obs, exp) def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self): + response = self.get('/api/v1/study/1/samples', headers=self.headers) + self.assertEqual(response.code, 200) + + # If no new categories, both new and existing samples should succeed. + # 640201 is an existing sample. blank.a1 is a new sample body = _sample_creator(['1.SKM8.640201', 'blank.a1']) + response = self.patch('/api/v1/study/1/samples', headers=self.headers, + data=body, asjson=True) + self.assertEqual(response.code, 200) + # successful response should be empty string + self.assertEqual(response.body, b'') + + # If new categories are added, patch() should succeed. + # New and existing samples should have new categories. + # 640201 is an existing sample. blank.a2 is a new sample + body = _sample_creator(['1.SKM8.640201', 'blank.a2']) + # body['blank.a2']['DOES_NOT_EXIST'] will be '', not None. + # body['1.SKM8.640201']['WHAT'] will be '', not None. body['1.SKM8.640201']['DOES_NOT_EXIST'] = 'foo' - body['blank.a1']['WHAT'] = 'bar' + body['blank.a2']['WHAT'] = 'bar' + + response = self.patch('/api/v1/study/1/samples', headers=self.headers, + data=body, asjson=True) + self.assertEqual(response.code, 200) + # successful response should be empty string + self.assertEqual(response.body, b'') + + response = self.get(('/api/v1/study/1/samples/categories=' + 'does_not_exist,what'), headers=self.headers) + self.assertEqual(response.code, 200) - exp = {'message': "Some categories do not exist in the sample " - "information", - 'categories_not_found': ['DOES_NOT_EXIST', 'WHAT']} + # decode results manually from bytes, replacing non-JSON-spec 'NaN' + # values with JSON-spec 'null'. These will convert to Python None + # values when load()ed. + obs = response.body.decode("utf-8").replace('NaN', 'null') + obs = json.loads(obs) + + exp = {'header': ['does_not_exist', 'what'], + 'samples': {'1.SKM7.640188': [None, None], + '1.SKD9.640182': [None, None], + '1.SKB8.640193': [None, None], + '1.SKD2.640178': [None, None], + '1.SKM3.640197': [None, None], + '1.SKM4.640180': [None, None], + '1.SKB9.640200': [None, None], + '1.SKB4.640189': [None, None], + '1.SKB5.640181': [None, None], + '1.SKB6.640176': [None, None], + '1.SKM2.640199': [None, None], + '1.SKM5.640177': [None, None], + '1.SKB1.640202': [None, None], + '1.SKD8.640184': [None, None], + '1.SKD4.640185': [None, None], + '1.SKB3.640195': [None, None], + '1.SKM1.640183': [None, None], + '1.SKB7.640196': [None, None], + '1.SKD3.640198': [None, None], + '1.SKD7.640191': [None, None], + '1.SKD6.640190': [None, None], + '1.SKB2.640194': [None, None], + '1.SKM9.640192': [None, None], + '1.SKM6.640187': [None, None], + '1.SKD5.640186': [None, None], + '1.SKD1.640179': [None, None], + '1.blank.a1': [None, None], + '1.blank.a2': ['', 'bar'], + '1.SKM8.640201': ['foo', '']}} + + self.assertDictEqual(obs, exp) + + # If categories were removed, both existing and new samples should + # fail. + # 640201 is an existing sample. blank.a3 is a new sample + body = _sample_creator(['1.SKM8.640201', 'blank.a3']) + del (body['1.SKM8.640201']['env_biome']) + del (body['blank.a3']['env_biome']) + + exp = {'message': 'Not all sample information categories provided'} response = self.patch('/api/v1/study/1/samples', headers=self.headers, data=body, asjson=True) self.assertEqual(response.code, 400) From 27cf67f0621536a7acc3d7ffc112a1d9c6f09070 Mon Sep 17 00:00:00 2001 From: Charles Cowart Date: Fri, 20 Jan 2023 22:23:58 -0800 Subject: [PATCH 2/4] Fixed status code When a PATCH includes new samples as well as existing ones, 201 should be returned on success, rather than 200. --- qiita_pet/handlers/rest/study_samples.py | 4 +++- qiita_pet/test/rest/test_study_samples.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/qiita_pet/handlers/rest/study_samples.py b/qiita_pet/handlers/rest/study_samples.py index e9fee5a91..100cbafac 100644 --- a/qiita_pet/handlers/rest/study_samples.py +++ b/qiita_pet/handlers/rest/study_samples.py @@ -193,7 +193,9 @@ def patch(self, study_id): if overlapping_ids: to_update = data.loc[overlapping_ids] study.sample_template.update(to_update) - status = 200 + if status == 500: + # don't overwrite a possible status = 201 + status = 200 self.set_status(status) self.finish() diff --git a/qiita_pet/test/rest/test_study_samples.py b/qiita_pet/test/rest/test_study_samples.py index c25e32b7b..88c77d919 100644 --- a/qiita_pet/test/rest/test_study_samples.py +++ b/qiita_pet/test/rest/test_study_samples.py @@ -213,7 +213,7 @@ def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self): body = _sample_creator(['1.SKM8.640201', 'blank.a1']) response = self.patch('/api/v1/study/1/samples', headers=self.headers, data=body, asjson=True) - self.assertEqual(response.code, 200) + self.assertEqual(response.code, 201) # successful response should be empty string self.assertEqual(response.body, b'') @@ -228,7 +228,7 @@ def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self): response = self.patch('/api/v1/study/1/samples', headers=self.headers, data=body, asjson=True) - self.assertEqual(response.code, 200) + self.assertEqual(response.code, 201) # successful response should be empty string self.assertEqual(response.body, b'') From f34f85657f887357602370751556b08111ee290b Mon Sep 17 00:00:00 2001 From: Charles Cowart Date: Mon, 23 Jan 2023 16:18:39 -0800 Subject: [PATCH 3/4] Fixed tests Tests ran fine individually, but failed when ran as a whole suite because categories and samples added for one test would affect tests downstream. --- qiita_pet/handlers/rest/study_samples.py | 10 +- qiita_pet/test/rest/test_study_samples.py | 107 ++++++++++------------ 2 files changed, 54 insertions(+), 63 deletions(-) diff --git a/qiita_pet/handlers/rest/study_samples.py b/qiita_pet/handlers/rest/study_samples.py index 100cbafac..3919d3df9 100644 --- a/qiita_pet/handlers/rest/study_samples.py +++ b/qiita_pet/handlers/rest/study_samples.py @@ -168,11 +168,11 @@ def patch(self, study_id): categories = set(study.sample_template.categories) - if set(data.columns) != categories: - if set(data.columns).issubset(categories): - self.fail('Not all sample information categories provided', - 400) - return + if set(data.columns) != categories and set(data.columns).issubset( + categories): + self.fail('Not all sample information categories provided', + 400) + return existing_samples = set(sample_info.index) overlapping_ids = set(data.index).intersection(existing_samples) diff --git a/qiita_pet/test/rest/test_study_samples.py b/qiita_pet/test/rest/test_study_samples.py index 88c77d919..6d246bd6c 100644 --- a/qiita_pet/test/rest/test_study_samples.py +++ b/qiita_pet/test/rest/test_study_samples.py @@ -16,23 +16,31 @@ from qiita_pet.test.rest.test_base import RESTHandlerTestCase -def _sample_creator(ids): - categories = ['season_environment', 'env_package', - 'assigned_from_geo', 'texture', 'taxon_id', - 'depth', 'host_taxid', 'common_name', - 'water_content_soil', 'elevation', 'temp', - 'tot_nitro', 'samp_salinity', 'altitude', - 'env_biome', 'country', 'ph', 'anonymized_name', - 'tot_org_carb', 'description_duplicate', - 'env_feature', 'physical_specimen_location', - 'physical_specimen_remaining', 'dna_extracted', - 'sample_type', 'collection_timestamp', - 'host_subject_id', 'description', - 'latitude', 'longitude', 'scientific_name'] +def _sample_creator(ids, categories=None): + if categories is None: + categories = ['season_environment', 'env_package', + 'assigned_from_geo', 'texture', 'taxon_id', + 'depth', 'host_taxid', 'common_name', + 'water_content_soil', 'elevation', 'temp', + 'tot_nitro', 'samp_salinity', 'altitude', + 'env_biome', 'country', 'ph', 'anonymized_name', + 'tot_org_carb', 'description_duplicate', + 'env_feature', 'physical_specimen_location', + 'physical_specimen_remaining', 'dna_extracted', + 'sample_type', 'collection_timestamp', + 'host_subject_id', 'description', + 'latitude', 'longitude', 'scientific_name'] return {i: {c: 1 for c in categories} for i in ids} class StudySamplesHandlerTests(RESTHandlerTestCase): + def _get_sample_categories(self, study_id): + response = self.get('/api/v1/study/1/samples/info', + headers=self.headers) + self.assertEqual(response.code, 200) + obs = json_decode(response.body) + return obs['categories'] + def test_patch_accept_new_categories(self): body = {'1.SKM1.999998': {'dna_extracted': 'foo', 'host_taxid': 'foo', @@ -131,10 +139,6 @@ def test_patch_accept_new_categories(self): self.assertIn('new_field1', obs['categories']) self.assertIn('new_field2', obs['categories']) - # TODO: need a test to get metadata for 1.SKM1.999998 and 1.SKM1.999999 - # and confirm new_field1 and new_field2 are empty for the former and - # filled for the latter. - # remove a few existing fields, representing retired fields. for sample_id in body: del (body[sample_id]['ph']) @@ -205,12 +209,11 @@ def test_patch_sample_ids_exist_incomplete_metadata(self): self.assertEqual(obs, exp) def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self): - response = self.get('/api/v1/study/1/samples', headers=self.headers) - self.assertEqual(response.code, 200) - + current = self._get_sample_categories(1) # If no new categories, both new and existing samples should succeed. # 640201 is an existing sample. blank.a1 is a new sample - body = _sample_creator(['1.SKM8.640201', 'blank.a1']) + body = _sample_creator(['1.SKM8.640201', + 'blank.a1'], categories=current) response = self.patch('/api/v1/study/1/samples', headers=self.headers, data=body, asjson=True) self.assertEqual(response.code, 201) @@ -220,7 +223,8 @@ def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self): # If new categories are added, patch() should succeed. # New and existing samples should have new categories. # 640201 is an existing sample. blank.a2 is a new sample - body = _sample_creator(['1.SKM8.640201', 'blank.a2']) + body = _sample_creator(['1.SKM8.640201', + 'blank.a2'], categories=current) # body['blank.a2']['DOES_NOT_EXIST'] will be '', not None. # body['1.SKM8.640201']['WHAT'] will be '', not None. body['1.SKM8.640201']['DOES_NOT_EXIST'] = 'foo' @@ -242,43 +246,24 @@ def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self): obs = response.body.decode("utf-8").replace('NaN', 'null') obs = json.loads(obs) - exp = {'header': ['does_not_exist', 'what'], - 'samples': {'1.SKM7.640188': [None, None], - '1.SKD9.640182': [None, None], - '1.SKB8.640193': [None, None], - '1.SKD2.640178': [None, None], - '1.SKM3.640197': [None, None], - '1.SKM4.640180': [None, None], - '1.SKB9.640200': [None, None], - '1.SKB4.640189': [None, None], - '1.SKB5.640181': [None, None], - '1.SKB6.640176': [None, None], - '1.SKM2.640199': [None, None], - '1.SKM5.640177': [None, None], - '1.SKB1.640202': [None, None], - '1.SKD8.640184': [None, None], - '1.SKD4.640185': [None, None], - '1.SKB3.640195': [None, None], - '1.SKM1.640183': [None, None], - '1.SKB7.640196': [None, None], - '1.SKD3.640198': [None, None], - '1.SKD7.640191': [None, None], - '1.SKD6.640190': [None, None], - '1.SKB2.640194': [None, None], - '1.SKM9.640192': [None, None], - '1.SKM6.640187': [None, None], - '1.SKD5.640186': [None, None], - '1.SKD1.640179': [None, None], - '1.blank.a1': [None, None], - '1.blank.a2': ['', 'bar'], - '1.SKM8.640201': ['foo', '']}} - - self.assertDictEqual(obs, exp) + self.assertEqual(obs['header'], ['does_not_exist', 'what']) + + self.assertEqual(obs['samples']['1.blank.a2'], ['', 'bar']) + self.assertEqual(obs['samples']['1.SKM8.640201'], ['foo', '']) + + # as the number and names of samples is dynamic, simply confirm the + # other samples are unchanged. + for sample in obs['samples']: + if sample not in ['1.blank.a2', '1.SKM8.640201']: + print(sample) + self.assertEqual(obs['samples'][sample], [None, None]) # If categories were removed, both existing and new samples should # fail. # 640201 is an existing sample. blank.a3 is a new sample - body = _sample_creator(['1.SKM8.640201', 'blank.a3']) + current = self._get_sample_categories(1) + body = _sample_creator(['1.SKM8.640201', + 'blank.a3'], categories=current) del (body['1.SKM8.640201']['env_biome']) del (body['blank.a3']['env_biome']) @@ -290,7 +275,9 @@ def test_patch_sample_ids_complete_metadata_and_unknown_metadata(self): self.assertEqual(obs, exp) def test_patch_sample_ids_already_exist(self): - body = _sample_creator(['1.SKM8.640201', '1.SKM3.640197']) + current = self._get_sample_categories(1) + body = _sample_creator(['1.SKM8.640201', + '1.SKM3.640197'], categories=current) response = self.patch('/api/v1/study/1/samples', headers=self.headers, data=body, asjson=True) self.assertEqual(response.code, 200) @@ -302,7 +289,8 @@ def test_patch_sample_ids_already_exist(self): self.assertNotEqual(df.loc['1.SKM4.640180']['elevation'], '1') def test_patch_sample_ids_do_not_exist(self): - body = _sample_creator(['blank.a1']) + current = self._get_sample_categories(1) + body = _sample_creator(['blank.a1'], categories=current) response = self.patch('/api/v1/study/1/samples', headers=self.headers, data=body, asjson=True) self.assertEqual(response.code, 201) @@ -311,7 +299,10 @@ def test_patch_sample_ids_do_not_exist(self): self.assertEqual(df.loc['1.blank.a1']['elevation'], '1') def test_patch_sample_ids_partially_exist(self): - body = _sample_creator(['blank.b1', '1.SKM5.640177', '1.SKB9.640200']) + current = self._get_sample_categories(1) + body = _sample_creator(['blank.b1', + '1.SKM5.640177', + '1.SKB9.640200'], categories=current) response = self.patch('/api/v1/study/1/samples', headers=self.headers, data=body, asjson=True) self.assertEqual(response.code, 201) From beea417b11841ac911281cf9d6342c0dd5e33d9b Mon Sep 17 00:00:00 2001 From: Charles Cowart Date: Mon, 23 Jan 2023 20:25:25 -0800 Subject: [PATCH 4/4] Remove indeterminate state causing condition from REST call. --- qiita_pet/handlers/rest/study_samples.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qiita_pet/handlers/rest/study_samples.py b/qiita_pet/handlers/rest/study_samples.py index 3919d3df9..e5d9c1049 100644 --- a/qiita_pet/handlers/rest/study_samples.py +++ b/qiita_pet/handlers/rest/study_samples.py @@ -168,8 +168,10 @@ def patch(self, study_id): categories = set(study.sample_template.categories) - if set(data.columns) != categories and set(data.columns).issubset( - categories): + # issuperset() will return True for true supersets or exact matches. + # In either case, keep processing. Subsets of categories remain + # invalid, however. + if not set(data.columns).issuperset(categories): self.fail('Not all sample information categories provided', 400) return