From b3a0df780605cc396101f3ee4bfd1f0ed51036fd Mon Sep 17 00:00:00 2001 From: markh Date: Wed, 14 Mar 2018 09:41:31 +0000 Subject: [PATCH 1/6] timeout to 11 --- lib/bald/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index e40eae7..c103f37 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -258,11 +258,11 @@ def __getitem__(self, item): # print('trying: {}'.format(item)) headers = {'Accept': 'application/rdf+xml'} - self.cache[item] = requests.get(item, headers=headers, timeout=7) + self.cache[item] = requests.get(item, headers=headers, timeout=11) except Exception: # print('retrying: {}'.format(item)) headers = {'Accept': 'text/html'} - self.cache[item] = requests.get(item, headers=headers, timeout=7) + self.cache[item] = requests.get(item, headers=headers, timeout=11) # print('in {} seconds'.format(time.time() - then)) return self.cache[item] From d450dc0be68e6bbb7ab66984d4d59c653f9680e0 Mon Sep 17 00:00:00 2001 From: markh Date: Wed, 14 Mar 2018 15:44:35 +0000 Subject: [PATCH 2/6] rdf collection partial --- lib/bald/__init__.py | 70 ++++++++++++------- .../integration/CDL/ereefs_gbr4_ncld.cdl | 8 --- .../tests/integration/test_cdl_rdfgraph.py | 38 ++++++++-- 3 files changed, 76 insertions(+), 40 deletions(-) diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index c103f37..4206e20 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -10,6 +10,7 @@ import numpy as np import pyparsing import rdflib +import rdflib.collection import requests import six @@ -524,10 +525,12 @@ def viewgraph(self): def rdfnode(self, graph): selfnode = rdflib.URIRef(self.identity) for attr in self.attrs: + list_items = [] objs = self.attrs[attr] if(isinstance(objs, np.ndarray)): #try to convert np.ndarray to a list objs = objs.tolist() + if not (isinstance(objs, set) or isinstance(objs, list)): objs = set([objs]) for obj in objs: @@ -546,16 +549,24 @@ def rdfnode(self, graph): else: rdfobj = rdflib.Literal(rdfobj) rdfpred = rdflib.URIRef(rdfpred) - try: - graph.add((selfnode, rdfpred, rdfobj)) - - except AssertionError: - - graph.add((selfnode, rdfpred, rdfobj)) + if isinstance(objs, set): + try: + graph.add((selfnode, rdfpred, rdfobj)) + + except AssertionError: + pass + #graph.add((selfnode, rdfpred, rdfobj)) + elif isinstance(objs, list): + list_items.append(rdfobj) if isinstance(obj, Subject): obj_ref = rdflib.URIRef(obj.identity) if (obj_ref, None, None) not in graph: graph = obj.rdfnode(graph) + if list_items: + list_name = rdflib.BNode() + col = rdflib.collection.Collection(graph, list_name, list_items) + + graph.add((selfnode, rdfpred, list_name)) return graph @@ -887,10 +898,18 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None): 'filter(?rtype = owl:Class) ' '}') + qstr = ('prefix bald: ' + 'prefix skos: ' + 'prefix owl: ' + 'select ?s ' + 'where { ' + ' ?s rdfs:range ?type . ' + 'filter(?type in (rdfs:Literal, skos:Concept)) ' + '}') + refs = reference_graph.query(qstr) - ref_prefs = [str(ref[0]) for ref in list(refs)] - + non_ref_prefs = [str(ref[0]) for ref in list(refs)] # cycle again and find references for name in fhandle.variables: @@ -906,30 +925,29 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None): # for sattr in sattrs: for sattr in (sattr for sattr in sattrs if - root_container.unpack_predicate(sattr) in ref_prefs): - # if sattr == 'coordinates': - # import pdb; pdb.set_trace() + root_container.unpack_predicate(sattr) not in non_ref_prefs): if (isinstance(sattrs[sattr], six.string_types) and file_variables.get(sattrs[sattr])): # next: remove all use of set, everything is dict or orderedDict var.attrs[sattr] = set((file_variables.get(sattrs[sattr]),)) elif isinstance(sattrs[sattr], six.string_types): - potrefs_list = sattrs[sattr].split(',') - potrefs_set = sattrs[sattr].split(' ') - if len(potrefs_list) > 1: - refs = np.array([file_variables.get(pref) is not None - for pref in potrefs_list]) - if np.all(refs): - var.attrs[sattr] = [file_variables.get(pref) - for pref in potrefs_list] - - elif len(potrefs_set) > 1: - refs = np.array([file_variables.get(pref) is not None - for pref in potrefs_set]) - if np.all(refs): - var.attrs[sattr] = set([file_variables.get(pref) - for pref in potrefs_set]) + if sattrs[sattr].startswith('(') and sattrs[sattr].endswith(')'): + potrefs_list = sattrs[sattr].lstrip('( ').rstrip(' )').split(' ') + if len(potrefs_list) > 1: + refs = np.array([file_variables.get(pref) is not None + for pref in potrefs_list]) + if np.all(refs): + var.attrs[sattr] = [file_variables.get(pref) + for pref in potrefs_list] + else: + potrefs_set = sattrs[sattr].split(' ') + if len(potrefs_set) > 1: + refs = np.array([file_variables.get(pref) is not None + for pref in potrefs_set]) + if np.all(refs): + var.attrs[sattr] = set([file_variables.get(pref) + for pref in potrefs_set]) # coordinate variables are bald__references except for # variables that already declare themselves as bald__Reference diff --git a/lib/bald/tests/integration/CDL/ereefs_gbr4_ncld.cdl b/lib/bald/tests/integration/CDL/ereefs_gbr4_ncld.cdl index 7c1ea9c..6824a3c 100644 --- a/lib/bald/tests/integration/CDL/ereefs_gbr4_ncld.cdl +++ b/lib/bald/tests/integration/CDL/ereefs_gbr4_ncld.cdl @@ -126,19 +126,11 @@ variables: :metadata_link = "http://marlin.csiro.au/geonetwork/srv/eng/search?&uuid=72020224-f086-434a-bbe9-a222c8e5cf0d" ; :rdf__type = "bald__Container" ; :bald__isPrefixedBy = "prefix_list" ; - :bald__isAliasedBy = "alias_list" ; group: prefix_list { // group attributes: :bald__ = "http://binary-array-ld.net/latest/" ; :rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; } // group bald__prefix_list - - group: alias_list { - // group attributes: - :qudt = "http://qudt.org/1.1/schema/qudt"; - :ed_gov_au_op = "http://environment.data.gov.au/def/op" ; - - } // group bald__alias_list } diff --git a/lib/bald/tests/integration/test_cdl_rdfgraph.py b/lib/bald/tests/integration/test_cdl_rdfgraph.py index 734afd9..72fa7ef 100644 --- a/lib/bald/tests/integration/test_cdl_rdfgraph.py +++ b/lib/bald/tests/integration/test_cdl_rdfgraph.py @@ -90,14 +90,40 @@ def test_gems_co2(self): expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) + def test_ProcessChain0300(self): + with self.temp_filename('.nc') as tfile: + name = 'ProcessChain0300' + cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name)) + subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) + cdl_file_uri = 'file://CDL/{}.cdl'.format(name) + alias_dict = {'CFTerms': 'http://def.scitools.org.uk/CFTerms', + 'cf_sname': 'http://vocab.nerc.ac.uk/standard_name/' + } + alias_dict = {} + root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, + alias_dict=alias_dict, cache=self.acache) + ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") + # with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: + # sf.write(ttl) + with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: + expected_ttl = sf.read() + self.assertEqual(expected_ttl, ttl) + def test_ereefs(self): with self.temp_filename('.nc') as tfile: - cdl_file = os.path.join(self.cdl_path, 'ereefs_gbr4_ncld.cdl') + name = 'ereefs_gbr4_ncld' + cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name)) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf(tfile, cache=self.acache) - try: - g = root_container.rdfgraph() - ttl = g.serialize(format='n3').decode("utf-8") - except TypeError: - self.fail("Test case could not convert ereefs CDL to RDF") + # try: + # g = root_container.rdfgraph() + # ttl = g.serialize(format='n3').decode("utf-8") + # except TypeError: + # self.fail("Test case could not convert ereefs CDL to RDF") + ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") + # with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: + # sf.write(ttl) + with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: + expected_ttl = sf.read() + self.assertEqual(expected_ttl, ttl) From c644a46d374c5cd71a76a1a1585a95efe7a745c9 Mon Sep 17 00:00:00 2001 From: markh Date: Thu, 15 Mar 2018 13:23:19 +0000 Subject: [PATCH 3/6] list handling --- lib/bald/__init__.py | 18 +- .../integration/CDL/multi_array_reference.cdl | 2 +- .../integration/TTL/ProcessChain0300.ttl | 30 +++ .../integration/TTL/ereefs_gbr4_ncld.ttl | 229 ++++++++++++++++++ .../integration/TTL/multi_array_reference.ttl | 3 +- .../tests/integration/test_cdl_rdfgraph.py | 46 ++-- 6 files changed, 294 insertions(+), 34 deletions(-) create mode 100644 lib/bald/tests/integration/TTL/ProcessChain0300.ttl create mode 100644 lib/bald/tests/integration/TTL/ereefs_gbr4_ncld.ttl diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index 4206e20..0a8857d 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -259,11 +259,11 @@ def __getitem__(self, item): # print('trying: {}'.format(item)) headers = {'Accept': 'application/rdf+xml'} - self.cache[item] = requests.get(item, headers=headers, timeout=11) + self.cache[item] = requests.get(item, headers=headers, timeout=17) except Exception: # print('retrying: {}'.format(item)) headers = {'Accept': 'text/html'} - self.cache[item] = requests.get(item, headers=headers, timeout=11) + self.cache[item] = requests.get(item, headers=headers, timeout=17) # print('in {} seconds'.format(time.time() - then)) return self.cache[item] @@ -774,7 +774,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None): root_container = Container(baseuri, '', attrs, prefixes=prefixes, aliases=aliases, alias_graph=aliasgraph) - root_container.attrs['bald__contains'] = [] + root_container.attrs['bald__contains'] = set() file_variables = {} for name in fhandle.variables: if name == prefix_var_name: @@ -866,7 +866,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None): else: var = Subject(baseuri, name, sattrs, prefixes=prefixes, aliases=aliases, alias_graph=aliasgraph) - root_container.attrs['bald__contains'].append(var) + root_container.attrs['bald__contains'].add(var) file_variables[name] = var @@ -979,7 +979,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None): prefixes=prefixes, aliases=aliases, alias_graph=aliasgraph) - root_container.attrs['bald__contains'].append(ref_node) + root_container.attrs['bald__contains'].add(ref_node) file_variables[name] = ref_node refset.add(ref_node) var.attrs['bald__references'] = refset @@ -1018,7 +1018,7 @@ def validate(root_container, sval=None, cache=None): root_val = bv.ContainerValidation(subject=root_container, httpcache=cache) sval.stored_exceptions += root_val.exceptions() - for subject in root_container.attrs.get('bald__contains', []): + for subject in root_container.attrs.get('bald__contains', set()): if isinstance(subject, Array): array_val = bv.ArrayValidation(subject, httpcache=cache) sval.stored_exceptions += array_val.exceptions() @@ -1078,7 +1078,7 @@ def _hdf_group(fhandle, identity='root', baseuri=None, prefixes=None, root_container = Container(baseuri, identity, attrs, prefixes=prefixes, aliases=aliases, alias_graph=aliasgraph) - root_container.attrs['bald__contains'] = [] + root_container.attrs['bald__contains'] = set() file_variables = {} # iterate through the datasets and groups @@ -1089,14 +1089,14 @@ def _hdf_group(fhandle, identity='root', baseuri=None, prefixes=None, if not skip: if isinstance(dataset, h5py._hl.group.Group): new_cont, new_fvars = _hdf_group(dataset, name, baseuri, prefixes, aliases) - root_container.attrs['bald__contains'].append(new_cont) + root_container.attrs['bald__contains'].add(new_cont) file_variables = careful_update(file_variables, new_fvars) #if hasattr(dataset, 'shape'): elif isinstance(dataset, h5py._hl.dataset.Dataset): sattrs = dict(dataset.attrs) sattrs['bald__shape'] = dataset.shape dset = Array(baseuri, name, sattrs, prefixes, aliases, aliasgraph) - root_container.attrs['bald__contains'].append(dset) + root_container.attrs['bald__contains'].add(dset) file_variables[dataset.name] = dset return root_container, file_variables diff --git a/lib/bald/tests/integration/CDL/multi_array_reference.cdl b/lib/bald/tests/integration/CDL/multi_array_reference.cdl index f537270..b2e257e 100644 --- a/lib/bald/tests/integration/CDL/multi_array_reference.cdl +++ b/lib/bald/tests/integration/CDL/multi_array_reference.cdl @@ -36,7 +36,7 @@ variables: set_collection:bald__references = "data_variable1 data_variable2" ; int list_collection ; - list_collection:bald__references = "data_variable1,data_variable2" ; + list_collection:bald__references = "( data_variable1 data_variable2 )" ; // global attributes: diff --git a/lib/bald/tests/integration/TTL/ProcessChain0300.ttl b/lib/bald/tests/integration/TTL/ProcessChain0300.ttl new file mode 100644 index 0000000..75ec4f3 --- /dev/null +++ b/lib/bald/tests/integration/TTL/ProcessChain0300.ttl @@ -0,0 +1,30 @@ +@prefix CFTerms: . +@prefix DA: . +@prefix NWP: . +@prefix StatPP: . +@prefix bald: . +@prefix cf_sname: . +@prefix ns1: . +@prefix rdf: . +@prefix rdfs: . +@prefix xml: . +@prefix xsd: . + + a bald:Container ; + ns1:process_chain "gfsmos_process_chain" ; + bald:contains ns1:gfsmos_process_chain, + ns1:step1, + ns1:step2 ; + bald:isPrefixedBy "prefix_list" . + +ns1:gfsmos_process_chain a bald:Subject ; + ns1:OM_Process ( ns1:step1 ns1:step2 ) . + +ns1:step1 a bald:Subject ; + ns1:LE_ProcessStep ; + ns1:LE_Source . + +ns1:step2 a bald:Subject ; + ns1:LE_ProcessStep ; + ns1:LE_Source . + diff --git a/lib/bald/tests/integration/TTL/ereefs_gbr4_ncld.ttl b/lib/bald/tests/integration/TTL/ereefs_gbr4_ncld.ttl new file mode 100644 index 0000000..ad384ba --- /dev/null +++ b/lib/bald/tests/integration/TTL/ereefs_gbr4_ncld.ttl @@ -0,0 +1,229 @@ +@prefix bald: . +@prefix ns1: . +@prefix rdf: . +@prefix rdfs: . +@prefix xml: . +@prefix xsd: . + + a bald:Container ; + ns1:Conventions "CF-1.0" ; + ns1:Run_ID 1.85 ; + ns1:hasVocab "1" ; + ns1:metadata_link ; + ns1:paramfile "in.prm" ; + ns1:paramhead "GBR 4km resolution grid" ; + ns1:shoc_version "v1.1 rev(5249)" ; + ns1:title "GBR4 Hydro" ; + bald:contains ns1:botz, + ns1:eta, + ns1:eta_time_ref, + ns1:latitude, + ns1:longitude, + ns1:salt, + ns1:salt_time_ref, + ns1:temp, + ns1:temp_time_ref, + ns1:time, + ns1:u, + ns1:u_time_ref, + ns1:v, + ns1:v_time_ref, + ns1:wspeed_u, + ns1:wspeed_u_time_ref, + ns1:wspeed_v, + ns1:wspeed_v_time_ref, + ns1:zc ; + bald:isPrefixedBy "prefix_list" . + +ns1:botz a bald:Array ; + ns1:coordinates ns1:latitude, + ns1:longitude ; + ns1:long_name "Depth of sea-bed" ; + ns1:missing_value -99.0 ; + ns1:outside "9999" ; + ns1:positive "down" ; + ns1:standard_name "depth" ; + ns1:units "metre" ; + bald:shape "(180, 600)" . + +ns1:eta a bald:Array ; + ns1:coordinates ns1:latitude, + ns1:longitude, + ns1:time ; + ns1:long_name "Surface elevation" ; + ns1:medium_id ; + ns1:missing_value "1e+35" ; + ns1:positive "up" ; + ns1:scaledQuantityKind_id ; + ns1:standard_name "sea_surface_height_above_sea_level" ; + ns1:substanceOrTaxon_id ; + ns1:unit_id ; + ns1:units "metre" ; + ns1:valid_range ( -10.0 10.0 ) ; + bald:references ns1:eta_time_ref ; + bald:shape "(5, 180, 600)" . + +ns1:salt a bald:Array ; + ns1:coordinates ns1:latitude, + ns1:longitude, + ns1:time, + ns1:zc ; + ns1:long_name "Salinity" ; + ns1:medium_id ; + ns1:missing_value "1e+35" ; + ns1:scaledQuantityKind_id ; + ns1:substanceOrTaxon_id ; + ns1:unit_id ; + ns1:units "PSU" ; + ns1:valid_range [ rdf:first 0.0 ; + rdf:rest ( 40.0 ) ] ; + bald:references ns1:salt_time_ref ; + bald:shape "(5, 47, 180, 600)" . + +ns1:temp a bald:Array ; + ns1:coordinates ns1:latitude, + ns1:longitude, + ns1:time, + ns1:zc ; + ns1:long_name "Temperature" ; + ns1:medium_id ; + ns1:missing_value "1e+35" ; + ns1:scaledQuantityKind_id ; + ns1:substanceOrTaxon_id ; + ns1:unit_id ; + ns1:units "degrees C" ; + ns1:valid_range [ rdf:first 0.0 ; + rdf:rest ( 40.0 ) ] ; + bald:references ns1:temp_time_ref ; + bald:shape "(5, 47, 180, 600)" . + +ns1:wspeed_u a bald:Array ; + ns1:coordinates ns1:latitude, + ns1:longitude, + ns1:time ; + ns1:long_name "eastward_wind" ; + ns1:missing_value "1e+35" ; + ns1:units "ms-1" ; + ns1:valid_range ( -1000.0 1000.0 ) ; + bald:references ns1:wspeed_u_time_ref ; + bald:shape "(5, 180, 600)" . + +ns1:wspeed_v a bald:Array ; + ns1:coordinates ns1:latitude, + ns1:longitude, + ns1:time ; + ns1:long_name "northward_wind" ; + ns1:missing_value "1e+35" ; + ns1:units "ms-1" ; + ns1:valid_range ( -1000.0 1000.0 ) ; + bald:references ns1:wspeed_v_time_ref ; + bald:shape "(5, 180, 600)" . + +ns1:eta_time_ref a bald:Reference, + bald:Subject ; + bald:array ns1:time ; + bald:childBroadcast "(5, 1, 1)" . + +ns1:salt_time_ref a bald:Reference, + bald:Subject ; + bald:array ns1:time ; + bald:childBroadcast "(5, 1, 1, 1)" . + +ns1:temp_time_ref a bald:Reference, + bald:Subject ; + bald:array ns1:time ; + bald:childBroadcast "(5, 1, 1, 1)" . + +ns1:u a bald:Array ; + ns1:coordinates ns1:latitude, + ns1:longitude, + ns1:time, + ns1:zc ; + ns1:long_name "Eastward current" ; + ns1:medium_id ; + ns1:missing_value "1e+35" ; + ns1:scaledQuantityKind_id ; + ns1:standard_name "eastward_sea_water_velocity" ; + ns1:substanceOrTaxon_id ; + ns1:unit_id ; + ns1:units "ms-1" ; + ns1:valid_range ( -100.0 100.0 ) ; + ns1:vector_components ns1:u, + ns1:v ; + ns1:vector_name "Currents" ; + bald:references ns1:u_time_ref ; + bald:shape "(5, 47, 180, 600)" . + +ns1:v_time_ref a bald:Reference, + bald:Subject ; + bald:array ns1:time ; + bald:childBroadcast "(5, 1, 1, 1)" . + +ns1:wspeed_u_time_ref a bald:Reference, + bald:Subject ; + bald:array ns1:time ; + bald:childBroadcast "(5, 1, 1)" . + +ns1:wspeed_v_time_ref a bald:Reference, + bald:Subject ; + bald:array ns1:time ; + bald:childBroadcast "(5, 1, 1)" . + +ns1:u_time_ref a bald:Reference, + bald:Subject ; + bald:array ns1:time ; + bald:childBroadcast "(5, 1, 1, 1)" . + +ns1:v a bald:Array ; + ns1:coordinates ns1:latitude, + ns1:longitude, + ns1:time, + ns1:zc ; + ns1:long_name "Northward current" ; + ns1:medium_id ; + ns1:missing_value "1e+35" ; + ns1:scaledQuantityKind_id ; + ns1:standard_name "northward_sea_water_velocity" ; + ns1:substanceOrTaxon_id ; + ns1:unit_id ; + ns1:units "ms-1" ; + ns1:valid_range ( -100.0 100.0 ) ; + ns1:vector_components ns1:u_time_ref, + ns1:v ; + ns1:vector_name "Currents" ; + bald:references ns1:v_time_ref ; + bald:shape "(5, 47, 180, 600)" . + +ns1:zc a bald:Array ; + ns1:axis "Z" ; + ns1:coordinate_type "Z" ; + ns1:long_name "Z coordinate" ; + ns1:positive "up" ; + ns1:units "m" ; + bald:shape "(47,)" . + +ns1:latitude a bald:Array ; + ns1:coordinate_type ns1:latitude ; + ns1:long_name "Latitude" ; + ns1:projection "geographic" ; + ns1:standard_name ns1:latitude ; + ns1:units "degrees_north" ; + bald:shape "(180, 600)" . + +ns1:longitude a bald:Array ; + ns1:coordinate_type ns1:longitude ; + ns1:long_name "Longitude" ; + ns1:projection "geographic" ; + ns1:standard_name ns1:longitude ; + ns1:units "degrees_east" ; + bald:shape "(180, 600)" . + +ns1:time a bald:Array, + bald:Reference ; + ns1:coordinate_type ns1:time ; + ns1:long_name "Time" ; + ns1:standard_name ns1:time ; + ns1:units "days since 1990-01-01 00:00:00 +10" ; + bald:array ns1:time ; + bald:shape "(5,)" . + diff --git a/lib/bald/tests/integration/TTL/multi_array_reference.ttl b/lib/bald/tests/integration/TTL/multi_array_reference.ttl index ec47a20..6892d13 100644 --- a/lib/bald/tests/integration/TTL/multi_array_reference.ttl +++ b/lib/bald/tests/integration/TTL/multi_array_reference.ttl @@ -36,8 +36,7 @@ ns1:data_variable2 a bald:Array ; bald:shape "(11, 17)" . ns1:list_collection a bald:Subject ; - bald:references ns1:data_variable1_pdim0_ref, - ns1:data_variable2_pdim0_ref . + bald:references ( ns1:data_variable1_pdim0_ref ns1:data_variable2_pdim0_ref ) . ns1:set_collection a bald:Subject ; bald:references ns1:data_variable1_pdim0_ref, diff --git a/lib/bald/tests/integration/test_cdl_rdfgraph.py b/lib/bald/tests/integration/test_cdl_rdfgraph.py index 72fa7ef..44ced65 100644 --- a/lib/bald/tests/integration/test_cdl_rdfgraph.py +++ b/lib/bald/tests/integration/test_cdl_rdfgraph.py @@ -22,8 +22,9 @@ def test_array_reference(self): cdl_file_uri = 'file://CDL/{}'.format(cdlname) root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") - # with open(os.path.join(self.ttl_path, 'array_reference.ttl'), 'w') as sf: - # sf.write(ttl) + if os.environ.get('bald_update_results') is not None: + with open(os.path.join(self.ttl_path, 'array_reference.ttl'), 'w') as sf: + sf.write(ttl) with open(os.path.join(self.ttl_path, 'array_reference.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) @@ -34,8 +35,9 @@ def test_array_reference_with_baseuri(self): subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf(tfile, baseuri='http://example.org/base', cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") - #with open(os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'w') as sf: - # sf.write(ttl) + if os.environ.get('bald_update_results') is not None: + with open(os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'w') as sf: + sf.write(ttl) with open(os.path.join(self.ttl_path, 'array_reference_withbase.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) @@ -48,8 +50,9 @@ def test_multi_array_reference(self): cdl_file_uri = 'file://CDL/{}'.format(cdlname) root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") - # with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'w') as sf: - # sf.write(ttl) + if os.environ.get('bald_update_results') is not None: + with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'w') as sf: + sf.write(ttl) with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) @@ -66,8 +69,9 @@ def test_point_template(self): root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, alias_dict=alias_dict, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") - # with open(os.path.join(self.ttl_path, 'point_template.ttl'), 'w') as sf: - # sf.write(ttl) + if os.environ.get('bald_update_results') is not None: + with open(os.path.join(self.ttl_path, 'point_template.ttl'), 'w') as sf: + sf.write(ttl) with open(os.path.join(self.ttl_path, 'point_template.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) @@ -84,8 +88,9 @@ def test_gems_co2(self): root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, alias_dict=alias_dict, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") - # with open(os.path.join(self.ttl_path, 'GEMS_CO2_Apr2006.ttl'), 'w') as sf: - # sf.write(ttl) + if os.environ.get('bald_update_results') is not None: + with open(os.path.join(self.ttl_path, 'GEMS_CO2_Apr2006.ttl'), 'w') as sf: + sf.write(ttl) with open(os.path.join(self.ttl_path, 'GEMS_CO2_Apr2006.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) @@ -99,12 +104,12 @@ def test_ProcessChain0300(self): alias_dict = {'CFTerms': 'http://def.scitools.org.uk/CFTerms', 'cf_sname': 'http://vocab.nerc.ac.uk/standard_name/' } - alias_dict = {} root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, alias_dict=alias_dict, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") - # with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: - # sf.write(ttl) + if os.environ.get('bald_update_results') is not None: + with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: + sf.write(ttl) with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) @@ -114,16 +119,13 @@ def test_ereefs(self): name = 'ereefs_gbr4_ncld' cdl_file = os.path.join(self.cdl_path, '{}.cdl'.format(name)) subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) - root_container = bald.load_netcdf(tfile, cache=self.acache) - # try: - # g = root_container.rdfgraph() - # ttl = g.serialize(format='n3').decode("utf-8") - # except TypeError: - # self.fail("Test case could not convert ereefs CDL to RDF") - + cdl_file_uri = 'file://CDL/{}.cdl'.format(name) + root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, + cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") - # with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: - # sf.write(ttl) + if os.environ.get('bald_update_results') is not None: + with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'w') as sf: + sf.write(ttl) with open(os.path.join(self.ttl_path, '{}.ttl'.format(name)), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) From 9a308e101d767174d563c4353b6cb09448f8fb86 Mon Sep 17 00:00:00 2001 From: markh Date: Thu, 15 Mar 2018 13:27:55 +0000 Subject: [PATCH 4/6] test ttl ordering --- .../tests/integration/TTL/point_template.ttl | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/lib/bald/tests/integration/TTL/point_template.ttl b/lib/bald/tests/integration/TTL/point_template.ttl index f121fac..1d3c512 100644 --- a/lib/bald/tests/integration/TTL/point_template.ttl +++ b/lib/bald/tests/integration/TTL/point_template.ttl @@ -78,42 +78,15 @@ ns1:z ; CFTerms:featureType "point" . -ns1:crs a bald:Subject ; - ns1:epsg_code "EPSG:4326" ; - ns1:grid_mapping_name "latitude_longitude" ; - ns1:inverse_flattening 298.257223563 ; - ns1:longitude_of_prime_meridian 0.0 ; - ns1:semi_major_axis 6378137.0 . - -ns1:instrument1 a bald:Subject ; - ns1:accuracy "" ; - ns1:calibration_date "2016-03-25" ; - ns1:comment "serial number and calibration dates are bogus" ; - ns1:make_model "SBE-37" ; - ns1:ncei_name "CTD" ; - ns1:precision "" ; - ns1:serial_number "1859723" ; - CFTerms:long_name "Seabird 37 Microcat" . - -ns1:platform1 a bald:Subject ; - ns1:call_sign "" ; - ns1:comment "Data is not actually collected from this platform, this is an example." ; - ns1:ices_code "" ; - ns1:imo_code "" ; - ns1:ioos_code "urn:ioos:station:NCEI:Mooring1" ; - ns1:ncei_code "FIXED PLATFORM, MOORINGS" ; - ns1:wmo_code "" ; - CFTerms:long_name "cordell bank monitoring station" . - ns1:sal a bald:Array ; ns1:_FillValue -9999.0 ; ns1:comment "These data are bogus!!!!!" ; ns1:coverage_content_type "physicalMeasurement" ; ns1:data_max 33.0 ; ns1:data_min 33.0 ; - ns1:instrument "instrument1" ; + ns1:instrument ns1:instrument1 ; ns1:ncei_name "SALINITY" ; - ns1:platform "platform1" ; + ns1:platform ns1:platform1 ; ns1:references ; ns1:source "numpy.random.rand function." ; ns1:valid_max 100.0 ; @@ -125,7 +98,7 @@ ns1:sal a bald:Array ; ns1:lon, ns1:time, ns1:z ; - CFTerms:grid_mapping "crs" ; + CFTerms:grid_mapping ns1:crs ; CFTerms:long_name "Salinity" ; CFTerms:missing_value -8888.0 ; CFTerms:scale_factor 1.0 ; @@ -138,9 +111,9 @@ ns1:temp a bald:Array ; ns1:coverage_content_type "physicalMeasurement" ; ns1:data_max 13.0 ; ns1:data_min 13.0 ; - ns1:instrument "instrument1" ; + ns1:instrument ns1:instrument1 ; ns1:ncei_name "WATER TEMPERATURE" ; - ns1:platform "platform1" ; + ns1:platform ns1:platform1 ; ns1:references ; ns1:source "numpy.random.rand function." ; ns1:valid_max 100.0 ; @@ -152,13 +125,30 @@ ns1:temp a bald:Array ; ns1:lon, ns1:time, ns1:z ; - CFTerms:grid_mapping "crs" ; + CFTerms:grid_mapping ns1:crs ; CFTerms:long_name "Temperature" ; CFTerms:missing_value -8888.0 ; CFTerms:scale_factor 1.0 ; CFTerms:standard_name ; CFTerms:units "degree_Celsius" . +ns1:crs a bald:Subject ; + ns1:epsg_code "EPSG:4326" ; + ns1:grid_mapping_name "latitude_longitude" ; + ns1:inverse_flattening 298.257223563 ; + ns1:longitude_of_prime_meridian 0.0 ; + ns1:semi_major_axis 6378137.0 . + +ns1:instrument1 a bald:Subject ; + ns1:accuracy "" ; + ns1:calibration_date "2016-03-25" ; + ns1:comment "serial number and calibration dates are bogus" ; + ns1:make_model "SBE-37" ; + ns1:ncei_name "CTD" ; + ns1:precision "" ; + ns1:serial_number "1859723" ; + CFTerms:long_name "Seabird 37 Microcat" . + ns1:lat a bald:Array ; ns1:_FillValue -9999.0 ; ns1:comment "These data are bogus!!!!!" ; @@ -181,6 +171,16 @@ ns1:lon a bald:Array ; CFTerms:standard_name ; CFTerms:units "degrees_east" . +ns1:platform1 a bald:Subject ; + ns1:call_sign "" ; + ns1:comment "Data is not actually collected from this platform, this is an example." ; + ns1:ices_code "" ; + ns1:imo_code "" ; + ns1:ioos_code "urn:ioos:station:NCEI:Mooring1" ; + ns1:ncei_code "FIXED PLATFORM, MOORINGS" ; + ns1:wmo_code "" ; + CFTerms:long_name "cordell bank monitoring station" . + ns1:time a bald:Array ; ns1:_FillValue -9999.0 ; ns1:comment "These data are bogus!!!!!" ; From 9328d2c731fc8fa91b4f76296afee48b33261bab Mon Sep 17 00:00:00 2001 From: markh Date: Thu, 15 Mar 2018 13:30:39 +0000 Subject: [PATCH 5/6] timeout management --- lib/bald/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index 0a8857d..13cc80e 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -259,11 +259,11 @@ def __getitem__(self, item): # print('trying: {}'.format(item)) headers = {'Accept': 'application/rdf+xml'} - self.cache[item] = requests.get(item, headers=headers, timeout=17) + self.cache[item] = requests.get(item, headers=headers, timeout=11) except Exception: # print('retrying: {}'.format(item)) headers = {'Accept': 'text/html'} - self.cache[item] = requests.get(item, headers=headers, timeout=17) + self.cache[item] = requests.get(item, headers=headers, timeout=11) # print('in {} seconds'.format(time.time() - then)) return self.cache[item] From d23d51c53aeb028721ae122438e23931e44c05e2 Mon Sep 17 00:00:00 2001 From: marqh Date: Thu, 22 Mar 2018 07:56:09 +0000 Subject: [PATCH 6/6] test iteration --- .../tests/integration/TTL/ereefs_gbr4_ncld.ttl | 6 ++---- .../integration/test_multi_array_reference.py | 18 +++++++++++------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/bald/tests/integration/TTL/ereefs_gbr4_ncld.ttl b/lib/bald/tests/integration/TTL/ereefs_gbr4_ncld.ttl index ad384ba..6338725 100644 --- a/lib/bald/tests/integration/TTL/ereefs_gbr4_ncld.ttl +++ b/lib/bald/tests/integration/TTL/ereefs_gbr4_ncld.ttl @@ -75,8 +75,7 @@ ns1:salt a bald:Array ; ns1:substanceOrTaxon_id ; ns1:unit_id ; ns1:units "PSU" ; - ns1:valid_range [ rdf:first 0.0 ; - rdf:rest ( 40.0 ) ] ; + ns1:valid_range ( 0.0 40.0 ) ; bald:references ns1:salt_time_ref ; bald:shape "(5, 47, 180, 600)" . @@ -92,8 +91,7 @@ ns1:temp a bald:Array ; ns1:substanceOrTaxon_id ; ns1:unit_id ; ns1:units "degrees C" ; - ns1:valid_range [ rdf:first 0.0 ; - rdf:rest ( 40.0 ) ] ; + ns1:valid_range ( 0.0 40.0 ) ; bald:references ns1:temp_time_ref ; bald:shape "(5, 47, 180, 600)" . diff --git a/lib/bald/tests/integration/test_multi_array_reference.py b/lib/bald/tests/integration/test_multi_array_reference.py index 0aac0fb..dd284b2 100644 --- a/lib/bald/tests/integration/test_multi_array_reference.py +++ b/lib/bald/tests/integration/test_multi_array_reference.py @@ -29,11 +29,14 @@ def test_load(self): cdlfile = os.path.join(self.cdl_path, 'multi_array_reference.cdl') with self.temp_filename('.nc') as tfile: subprocess.check_call(['ncgen', '-o', tfile, cdlfile]) - inputs = bald.load_netcdf(tfile, cache=self.acache) - set_collection = inputs.bald__contains[6].bald__references - self.assertTrue(isinstance(set_collection, set)) - list_collection = inputs.bald__contains[7].bald__references - self.assertTrue(isinstance(list_collection, list)) + inputs = bald.load_netcdf(tfile, baseuri='file://CDL/multi_array_reference.nc', + cache=self.acache) + + for contained in inputs.bald__contains: + if contained.identity == 'file://CDL/multi_array_reference.nc/list_collection': + self.assertTrue(isinstance(contained.bald__references, list)) + elif contained.identity == 'file://CDL/multi_array_reference.nc/set_collection': + self.assertTrue(isinstance(contained.bald__references, set)) def test_turtle(self): with self.temp_filename('.nc') as tfile: @@ -43,8 +46,9 @@ def test_turtle(self): cdl_file_uri = 'file://CDL/{}'.format(cdlname) root_container = bald.load_netcdf(tfile, baseuri=cdl_file_uri, cache=self.acache) ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") - # with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'w') as sf: - # sf.write(ttl) + if os.environ.get('bald_update_results') is not None: + with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'w') as sf: + sf.write(ttl) with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl)