7
7
from oaipmh .error import IdDoesNotExistError
8
8
from oaipmh .error import BadArgumentError
9
9
10
- from metax_api .models .catalog_record import CatalogRecord
10
+ from metax_api .models .catalog_record import CatalogRecord , DataCatalog
11
11
from metax_api .services import CatalogRecordService as CRS
12
12
13
+ syke_url_prefix_template = 'http://metatieto.ymparisto.fi:8080/geoportal/catalog/search/resource/details.page?uuid=%s'
14
+
13
15
14
16
class MetaxOAIServer (ResumptionOAIPMH ):
15
17
16
18
def _is_valid_set (self , set ):
17
- if not set or set == 'urnresolver' or set in settings .OAI ['SET_MAPPINGS' ]:
19
+ if not set or set in [ 'urnresolver' , 'datacatalogs' ] or set in settings .OAI ['SET_MAPPINGS' ]:
18
20
return True
19
21
return False
20
22
@@ -30,16 +32,20 @@ def _get_filtered_records(self, set, cursor, batch_size, from_=None, until=None)
30
32
if not self ._is_valid_set (set ):
31
33
raise BadArgumentError ('invalid set value' )
32
34
33
- query_set = CatalogRecord .objects .all ()
35
+ proxy = CatalogRecord
36
+ if set == 'datacatalogs' :
37
+ proxy = DataCatalog
38
+
39
+ query_set = proxy .objects .all ()
34
40
if from_ and until :
35
- query_set = CatalogRecord .objects .filter (date_modified__gte = from_ , date_modified__lte = until )
41
+ query_set = proxy .objects .filter (date_modified__gte = from_ , date_modified__lte = until )
36
42
elif from_ :
37
- query_set = CatalogRecord .objects .filter (date_modified__gte = from_ )
43
+ query_set = proxy .objects .filter (date_modified__gte = from_ )
38
44
elif until :
39
- query_set = CatalogRecord .objects .filter (date_modified__lte = until )
45
+ query_set = proxy .objects .filter (date_modified__lte = until )
40
46
41
47
if set :
42
- if set == 'urnresolver' :
48
+ if set in [ 'urnresolver' , 'datacatalogs' ] :
43
49
pass
44
50
else :
45
51
query_set = query_set .filter (
@@ -48,38 +54,154 @@ def _get_filtered_records(self, set, cursor, batch_size, from_=None, until=None)
48
54
query_set = query_set .filter (data_catalog__catalog_json__identifier__in = self ._get_default_set_filter ())
49
55
return query_set [cursor :batch_size ]
50
56
57
+ def _handle_syke_urnresolver_metadata (self , record ):
58
+ identifiers = []
59
+ preferred_identifier = record .research_dataset .get ('preferred_identifier' )
60
+ identifiers .append (preferred_identifier )
61
+ for id_obj in record .research_dataset .get ('other_identifier' , []):
62
+ if id_obj .get ('notation' , '' ).startswith ('{' ):
63
+ uuid = id_obj ['notation' ]
64
+ identifiers .append (syke_url_prefix_template % uuid )
65
+ return identifiers
66
+
51
67
def _get_oai_dc_urnresolver_metadata (self , record ):
52
68
"""
53
69
Preferred identifier is added only for ida and att catalog records
54
70
other identifiers are added for all.
71
+
72
+ Special handling for SYKE catalog.
55
73
"""
74
+
56
75
identifiers = []
57
- identifiers .append (settings .OAI ['ETSIN_URL_TEMPLATE' ] % record .identifier )
58
76
59
- # assuming ida and att catalogs are not harvested
60
- if not record .catalog_is_harvested ():
61
- preferred_identifier = record .research_dataset .get ('preferred_identifier' )
62
- identifiers .append (preferred_identifier )
63
- for id_obj in record .research_dataset .get ('other_identifier' , []):
64
- if id_obj .get ('notation' , '' ).startswith ('urn:nbn:fi:csc-kata' ):
65
- other_urn = id_obj ['notation' ]
66
- identifiers .append (other_urn )
77
+ data_catalog = record .data_catalog .catalog_json .get ('identifier' )
78
+ if data_catalog == 'urn:nbn:fi:att:data-catalog-harvest-syke' :
79
+ identifiers = self ._handle_syke_urnresolver_metadata (record )
80
+
81
+ else :
82
+ identifiers .append (settings .OAI ['ETSIN_URL_TEMPLATE' ] % record .identifier )
83
+
84
+ # assuming ida and att catalogs are not harvested
85
+ if not record .catalog_is_harvested ():
86
+ preferred_identifier = record .research_dataset .get ('preferred_identifier' )
87
+ identifiers .append (preferred_identifier )
88
+ for id_obj in record .research_dataset .get ('other_identifier' , []):
89
+ if id_obj .get ('notation' , '' ).startswith ('urn:nbn:fi:csc-kata' ):
90
+ other_urn = id_obj ['notation' ]
91
+ identifiers .append (other_urn )
67
92
68
93
meta = {
69
94
'identifier' : identifiers
70
95
}
71
96
return meta
72
97
73
- def _get_oai_dc_metadata (self , record ):
74
- identifier = record .research_dataset .get ('preferred_identifier' )
98
+ def _get_oaic_dc_value (self , value , lang = None ):
99
+ valueDict = {}
100
+ valueDict ['value' ] = value
101
+ if lang :
102
+ valueDict ['lang' ] = lang
103
+ return valueDict
104
+
105
+ def _get_oai_dc_metadata (self , record , json , type ):
106
+ identifier = []
107
+ if 'preferred_identifier' in json :
108
+ identifier .append (self ._get_oaic_dc_value (json .get ('preferred_identifier' )))
109
+ if 'identifier' in json :
110
+ identifier .append (self ._get_oaic_dc_value (json .get ('identifier' )))
111
+
112
+ title = []
113
+ title_data = json .get ('title' , {})
114
+ for key , value in title_data .items ():
115
+ title .append (self ._get_oaic_dc_value (value , key ))
116
+
117
+ creator = []
118
+ creator_data = json .get ('creator' , [])
119
+ for value in creator_data :
120
+ if 'name' in value :
121
+ creator .append (self ._get_oaic_dc_value (value .get ('name' )))
122
+
123
+ subject = []
124
+ subject_data = json .get ('keyword' , [])
125
+ for value in subject_data :
126
+ subject .append (self ._get_oaic_dc_value (value ))
127
+ subject_data = json .get ('field_of_science' , [])
128
+ for value in subject_data :
129
+ for key , value2 in value .get ('pref_label' , {}).items ():
130
+ subject .append (self ._get_oaic_dc_value (value2 , key ))
131
+ subject_data = json .get ('theme' , [])
132
+ for value in subject_data :
133
+ for key , value2 in value .get ('pref_label' , {}).items ():
134
+ subject .append (self ._get_oaic_dc_value (value2 , key ))
135
+
136
+ desc = []
137
+ desc_data = json .get ('description' , {}).get ('name' , {})
138
+ for key , value in desc_data .items ():
139
+ desc .append (self ._get_oaic_dc_value (value , key ))
140
+
141
+ publisher = []
142
+ publisher_data = json .get ('publisher' , {})
143
+ for key , value in publisher_data .get ('name' , {}).items ():
144
+ publisher .append (self ._get_oaic_dc_value (value , key ))
145
+
146
+ contributor = []
147
+ contributor_data = json .get ('contributor' , [])
148
+ for value in contributor_data :
149
+ if 'name' in value :
150
+ contributor .append (self ._get_oaic_dc_value (value .get ('name' )))
151
+
152
+ date = self ._get_oaic_dc_value (str (record .date_created ))
153
+
154
+ language = []
155
+ language_data = json .get ('language' , [])
156
+ for value in language_data :
157
+ if 'identifier' in value :
158
+ language .append (self ._get_oaic_dc_value (value ['identifier' ]))
159
+
160
+ relation = []
161
+ relation_data = json .get ('relation' , [])
162
+ for value in relation_data :
163
+ if 'identifier' in value .get ('entity' , {}):
164
+ relation .append (self ._get_oaic_dc_value (value ['entity' ]['identifier' ]))
165
+
166
+ coverage = []
167
+ coverage_data = json .get ('spatial' , [])
168
+ for value in coverage_data :
169
+ if 'geographic_name' in value :
170
+ coverage .append (self ._get_oaic_dc_value (value ['geographic_name' ]))
171
+
172
+ rights = []
173
+ rights_data = json .get ('access_rights' , {})
174
+ rights_desc = rights_data .get ('description' , {}).get ('name' , {})
175
+ for key , value in rights_desc .items ():
176
+ rights .append (self ._get_oaic_dc_value (value , key ))
177
+
178
+ for value in rights_data .get ('license' , []):
179
+ if 'identifier' in value :
180
+ rights .append (self ._get_oaic_dc_value (value ['identifier' ]))
181
+
182
+ types = []
183
+ types .append (self ._get_oaic_dc_value (type ))
184
+
75
185
meta = {
76
- 'identifier' : [identifier ]
186
+ 'identifier' : identifier ,
187
+ 'title' : title ,
188
+ 'creator' : creator ,
189
+ 'subject' : subject ,
190
+ 'description' : desc ,
191
+ 'publisher' : publisher ,
192
+ 'contributor' : contributor ,
193
+ 'date' : [date ],
194
+ 'type' : types ,
195
+ 'language' : language ,
196
+ 'relation' : relation ,
197
+ 'coverage' : coverage ,
198
+ 'rights' : rights
77
199
}
78
200
return meta
79
201
80
- def _get_oai_datacite_metadata (self , record ):
202
+ def _get_oai_datacite_metadata (self , json ):
81
203
datacite_xml = CRS .transform_datasets_to_format (
82
- {'research_dataset' : record . research_dataset }, 'datacite' , False
204
+ {'research_dataset' : json }, 'datacite' , False
83
205
)
84
206
meta = {
85
207
'datacentreSymbol' : 'Metax' ,
@@ -88,13 +210,20 @@ def _get_oai_datacite_metadata(self, record):
88
210
}
89
211
return meta
90
212
91
- def _get_metadata_for_record (self , record , metadata_prefix ):
213
+ def _get_metadata_for_record (self , record , json , type , metadata_prefix ):
214
+ if type == 'Datacatalog' and metadata_prefix != 'oai_dc' :
215
+ raise BadArgumentError ('Invalid set value. DataCatalogs can only be harvested using oai_dc format.' )
216
+
92
217
meta = {}
218
+ json = CRS .strip_catalog_record (json )
219
+
93
220
if metadata_prefix == 'oai_dc' :
94
- meta = self ._get_oai_dc_metadata (record )
221
+ meta = self ._get_oai_dc_metadata (record , json , type )
95
222
elif metadata_prefix == 'oai_datacite' :
96
- meta = self ._get_oai_datacite_metadata (record )
223
+ meta = self ._get_oai_datacite_metadata (json )
97
224
elif metadata_prefix == 'oai_dc_urnresolver' :
225
+ # This is a special case. Only identifier values are retrieved from the record,
226
+ # so strip_catalog_record is not applicable here.
98
227
meta = self ._get_oai_dc_urnresolver_metadata (record )
99
228
return self ._fix_metadata (meta )
100
229
@@ -106,9 +235,14 @@ def _get_header_timestamp(self, record):
106
235
timestamp = record .date_created
107
236
return timezone .make_naive (timestamp )
108
237
109
- def _get_oai_item (self , record , metadata_prefix ):
110
- identifier = record .identifier
111
- metadata = self ._get_metadata_for_record (record , metadata_prefix )
238
+ def _get_oai_item (self , identifier , record , metadata_prefix ):
239
+ metadata = self ._get_metadata_for_record (record , record .research_dataset , 'Dataset' , metadata_prefix )
240
+ item = (common .Header ('' , identifier , self ._get_header_timestamp (record ), ['metax' ], False ),
241
+ common .Metadata ('' , metadata ), None )
242
+ return item
243
+
244
+ def _get_oai_catalog_item (self , identifier , record , metadata_prefix ):
245
+ metadata = self ._get_metadata_for_record (record , record .catalog_json , 'Datacatalog' , metadata_prefix )
112
246
item = (common .Header ('' , identifier , self ._get_header_timestamp (record ), ['metax' ], False ),
113
247
common .Metadata ('' , metadata ), None )
114
248
return item
@@ -161,18 +295,24 @@ def listMetadataFormats(self, identifier=None):
161
295
162
296
def listSets (self , cursor = None , batch_size = None ):
163
297
"""Implement OAI-PMH verb ListSets."""
164
- data = []
298
+ data = [( 'datacatalogs' , 'datacatalog' , '' ) ]
165
299
for set_key in settings .OAI ['SET_MAPPINGS' ].keys ():
166
300
data .append ((set_key , set_key , '' ))
167
301
return data
168
302
303
+ def _get_record_identifier (self , record , set ):
304
+ if set == 'datacatalogs' :
305
+ return record .catalog_json ['identifier' ]
306
+ else :
307
+ return record .identifier
308
+
169
309
def listIdentifiers (self , metadataPrefix = None , set = None , cursor = None ,
170
310
from_ = None , until = None , batch_size = None ):
171
311
"""Implement OAI-PMH verb listIdentifiers."""
172
312
records = self ._get_filtered_records (set , cursor , batch_size , from_ , until )
173
313
data = []
174
314
for record in records :
175
- identifier = record . research_dataset . get ( 'preferred_identifier' )
315
+ identifier = self . _get_record_identifier ( record , set )
176
316
data .append (common .Header ('' , identifier , self ._get_header_timestamp (record ), ['metax' ], False ))
177
317
return data
178
318
@@ -182,18 +322,27 @@ def listRecords(self, metadataPrefix=None, set=None, cursor=None, from_=None,
182
322
data = []
183
323
records = self ._get_filtered_records (set , cursor , batch_size , from_ , until )
184
324
for record in records :
185
- data .append (self ._get_oai_item (record , metadataPrefix ))
325
+ identifier = self ._get_record_identifier (record , set )
326
+ if set == 'datacatalogs' :
327
+ data .append (self ._get_oai_catalog_item (identifier , record , metadataPrefix ))
328
+ else :
329
+ data .append (self ._get_oai_item (identifier , record , metadataPrefix ))
186
330
return data
187
331
188
332
def getRecord (self , metadataPrefix , identifier ):
189
333
"""Implement OAI-PMH verb GetRecord."""
190
334
try :
191
- record = CatalogRecord .objects .get (
192
- data_catalog__catalog_json__identifier__in = self ._get_default_set_filter (),
193
- identifier__exact = identifier
194
- )
335
+ record = CatalogRecord .objects .get (identifier__exact = identifier )
336
+ json = record .research_dataset
337
+ type = 'Dataset'
195
338
except CatalogRecord .DoesNotExist :
196
- raise IdDoesNotExistError ("No dataset with id %s available through the OAI-PMH interface." % identifier )
197
- metadata = self ._get_metadata_for_record (record , metadataPrefix )
339
+ try :
340
+ record = DataCatalog .objects .get (catalog_json__identifier__exact = identifier )
341
+ json = record .catalog_json
342
+ type = 'Datacatalog'
343
+ except DataCatalog .DoesNotExist :
344
+ raise IdDoesNotExistError ("No record with id %s available." % identifier )
345
+
346
+ metadata = self ._get_metadata_for_record (record , json , type , metadataPrefix )
198
347
return (common .Header ('' , identifier , self ._get_header_timestamp (record ), ['metax' ], False ),
199
348
common .Metadata ('' , metadata ), None )
0 commit comments