Skip to content
This repository was archived by the owner on Sep 16, 2022. It is now read-only.

Commit e630a33

Browse files
authored
Merge pull request #264 from CSCfi/CSCMETAX-394-remove-sensitive-data-in-oai-pmh
CSCMETAX-394: [ADD] Remove sensitive fields (email, phone, telephone)…
2 parents 3cd96f4 + 68c329a commit e630a33

File tree

2 files changed

+38
-0
lines changed

2 files changed

+38
-0
lines changed

src/metax_api/api/oaipmh/base/metax_oai_server.py

+6
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,12 @@ def _get_oai_datacite_metadata(self, record):
204204

205205
def _get_metadata_for_record(self, record, metadata_prefix):
206206
meta = {}
207+
208+
# strip sensitive fields from research_dataset. note: the modified research_dataset
209+
# is placed back into the record's research_dataset -field. meaning, an accidental call
210+
# of record.save() would overwrite the original data
211+
record.research_dataset = CRS.strip_catalog_record(record.research_dataset)
212+
207213
if metadata_prefix == 'oai_dc':
208214
meta = self._get_oai_dc_metadata(record)
209215
elif metadata_prefix == 'oai_datacite':

src/metax_api/tests/api/oaipmh/minimal_api.py

+32
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,35 @@ def test_get_oai_dc_metadata(self):
231231
self.assertTrue('identifier' in md)
232232
self.assertTrue('title' in md)
233233
self.assertTrue('lang' in md['title'][0])
234+
235+
def test_sensitive_fields_are_removed(self):
236+
"""
237+
Ensure some sensitive fields are never present in output of OAI-PMH apis
238+
"""
239+
sensitive_field_values = [ '[email protected]', '999-123-123', '999-456-456' ]
240+
241+
def _check_fields(content):
242+
"""
243+
Verify sensitive fields values are not in the content. Checking for field value, instead
244+
of field name, since the field names might be different in Datacite etc other formats.
245+
"""
246+
for sensitive_field_value in sensitive_field_values:
247+
self.assertEqual(sensitive_field_value not in str(content), True,
248+
'field %s should have been stripped' % sensitive_field_value)
249+
250+
# setup some records to have sensitive fields
251+
for cr in CatalogRecord.objects.filter(pk__in=(1, 2, 3)):
252+
cr.research_dataset['curator'][0].update({
253+
'email': sensitive_field_values[0],
254+
'phone': sensitive_field_values[1],
255+
'telephone': sensitive_field_values[2],
256+
})
257+
cr.force_save()
258+
259+
response = self.client.get('/oai/?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc' % self.identifier)
260+
self.assertEqual(response.status_code, status.HTTP_200_OK)
261+
_check_fields(response.content)
262+
263+
response = self.client.get('/oai/?verb=GetRecord&identifier=%s&metadataPrefix=oai_datacite' % self.identifier)
264+
self.assertEqual(response.status_code, status.HTTP_200_OK)
265+
_check_fields(response.content)

0 commit comments

Comments
 (0)