Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 53 additions & 35 deletions lib/bald/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np
import pyparsing
import rdflib
import rdflib.collection
import requests
import six

Expand Down Expand Up @@ -258,11 +259,11 @@ def __getitem__(self, item):
# print('trying: {}'.format(item))

headers = {'Accept': 'application/rdf+xml'}
self.cache[item] = requests.get(item, headers=headers, timeout=7)
self.cache[item] = requests.get(item, headers=headers, timeout=11)
except Exception:
# print('retrying: {}'.format(item))
headers = {'Accept': 'text/html'}
self.cache[item] = requests.get(item, headers=headers, timeout=7)
self.cache[item] = requests.get(item, headers=headers, timeout=11)

# print('in {} seconds'.format(time.time() - then))
return self.cache[item]
Expand Down Expand Up @@ -524,10 +525,12 @@ def viewgraph(self):
def rdfnode(self, graph):
selfnode = rdflib.URIRef(self.identity)
for attr in self.attrs:
list_items = []
objs = self.attrs[attr]
if(isinstance(objs, np.ndarray)):
#try to convert np.ndarray to a list
objs = objs.tolist()

if not (isinstance(objs, set) or isinstance(objs, list)):
objs = set([objs])
for obj in objs:
Expand All @@ -546,16 +549,24 @@ def rdfnode(self, graph):
else:
rdfobj = rdflib.Literal(rdfobj)
rdfpred = rdflib.URIRef(rdfpred)
try:
graph.add((selfnode, rdfpred, rdfobj))

except AssertionError:

graph.add((selfnode, rdfpred, rdfobj))
if isinstance(objs, set):
try:
graph.add((selfnode, rdfpred, rdfobj))

except AssertionError:
pass
#graph.add((selfnode, rdfpred, rdfobj))
elif isinstance(objs, list):
list_items.append(rdfobj)
if isinstance(obj, Subject):
obj_ref = rdflib.URIRef(obj.identity)
if (obj_ref, None, None) not in graph:
graph = obj.rdfnode(graph)
if list_items:
list_name = rdflib.BNode()
col = rdflib.collection.Collection(graph, list_name, list_items)

graph.add((selfnode, rdfpred, list_name))

return graph

Expand Down Expand Up @@ -763,7 +774,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
root_container = Container(baseuri, '', attrs, prefixes=prefixes,
aliases=aliases, alias_graph=aliasgraph)

root_container.attrs['bald__contains'] = []
root_container.attrs['bald__contains'] = set()
file_variables = {}
for name in fhandle.variables:
if name == prefix_var_name:
Expand Down Expand Up @@ -855,7 +866,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
else:
var = Subject(baseuri, name, sattrs, prefixes=prefixes,
aliases=aliases, alias_graph=aliasgraph)
root_container.attrs['bald__contains'].append(var)
root_container.attrs['bald__contains'].add(var)
file_variables[name] = var


Expand Down Expand Up @@ -887,10 +898,18 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
'filter(?rtype = owl:Class) '
'}')

qstr = ('prefix bald: <http://binary-array-ld.net/latest/> '
'prefix skos: <http://www.w3.org/2004/02/skos/core#> '
'prefix owl: <http://www.w3.org/2002/07/owl#> '
'select ?s '
'where { '
' ?s rdfs:range ?type . '
'filter(?type in (rdfs:Literal, skos:Concept)) '
'}')

refs = reference_graph.query(qstr)

ref_prefs = [str(ref[0]) for ref in list(refs)]

non_ref_prefs = [str(ref[0]) for ref in list(refs)]

# cycle again and find references
for name in fhandle.variables:
Expand All @@ -906,30 +925,29 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):

# for sattr in sattrs:
for sattr in (sattr for sattr in sattrs if
root_container.unpack_predicate(sattr) in ref_prefs):
# if sattr == 'coordinates':
# import pdb; pdb.set_trace()
root_container.unpack_predicate(sattr) not in non_ref_prefs):

if (isinstance(sattrs[sattr], six.string_types) and
file_variables.get(sattrs[sattr])):
# next: remove all use of set, everything is dict or orderedDict
var.attrs[sattr] = set((file_variables.get(sattrs[sattr]),))
elif isinstance(sattrs[sattr], six.string_types):
potrefs_list = sattrs[sattr].split(',')
potrefs_set = sattrs[sattr].split(' ')
if len(potrefs_list) > 1:
refs = np.array([file_variables.get(pref) is not None
for pref in potrefs_list])
if np.all(refs):
var.attrs[sattr] = [file_variables.get(pref)
for pref in potrefs_list]

elif len(potrefs_set) > 1:
refs = np.array([file_variables.get(pref) is not None
for pref in potrefs_set])
if np.all(refs):
var.attrs[sattr] = set([file_variables.get(pref)
for pref in potrefs_set])
if sattrs[sattr].startswith('(') and sattrs[sattr].endswith(')'):
potrefs_list = sattrs[sattr].lstrip('( ').rstrip(' )').split(' ')
if len(potrefs_list) > 1:
refs = np.array([file_variables.get(pref) is not None
for pref in potrefs_list])
if np.all(refs):
var.attrs[sattr] = [file_variables.get(pref)
for pref in potrefs_list]
else:
potrefs_set = sattrs[sattr].split(' ')
if len(potrefs_set) > 1:
refs = np.array([file_variables.get(pref) is not None
for pref in potrefs_set])
if np.all(refs):
var.attrs[sattr] = set([file_variables.get(pref)
for pref in potrefs_set])

# coordinate variables are bald__references except for
# variables that already declare themselves as bald__Reference
Expand Down Expand Up @@ -961,7 +979,7 @@ def load_netcdf(afilepath, baseuri=None, alias_dict=None, cache=None):
prefixes=prefixes,
aliases=aliases,
alias_graph=aliasgraph)
root_container.attrs['bald__contains'].append(ref_node)
root_container.attrs['bald__contains'].add(ref_node)
file_variables[name] = ref_node
refset.add(ref_node)
var.attrs['bald__references'] = refset
Expand Down Expand Up @@ -1000,7 +1018,7 @@ def validate(root_container, sval=None, cache=None):

root_val = bv.ContainerValidation(subject=root_container, httpcache=cache)
sval.stored_exceptions += root_val.exceptions()
for subject in root_container.attrs.get('bald__contains', []):
for subject in root_container.attrs.get('bald__contains', set()):
if isinstance(subject, Array):
array_val = bv.ArrayValidation(subject, httpcache=cache)
sval.stored_exceptions += array_val.exceptions()
Expand Down Expand Up @@ -1060,7 +1078,7 @@ def _hdf_group(fhandle, identity='root', baseuri=None, prefixes=None,
root_container = Container(baseuri, identity, attrs, prefixes=prefixes,
aliases=aliases, alias_graph=aliasgraph)

root_container.attrs['bald__contains'] = []
root_container.attrs['bald__contains'] = set()

file_variables = {}
# iterate through the datasets and groups
Expand All @@ -1071,14 +1089,14 @@ def _hdf_group(fhandle, identity='root', baseuri=None, prefixes=None,
if not skip:
if isinstance(dataset, h5py._hl.group.Group):
new_cont, new_fvars = _hdf_group(dataset, name, baseuri, prefixes, aliases)
root_container.attrs['bald__contains'].append(new_cont)
root_container.attrs['bald__contains'].add(new_cont)
file_variables = careful_update(file_variables, new_fvars)
#if hasattr(dataset, 'shape'):
elif isinstance(dataset, h5py._hl.dataset.Dataset):
sattrs = dict(dataset.attrs)
sattrs['bald__shape'] = dataset.shape
dset = Array(baseuri, name, sattrs, prefixes, aliases, aliasgraph)
root_container.attrs['bald__contains'].append(dset)
root_container.attrs['bald__contains'].add(dset)
file_variables[dataset.name] = dset
return root_container, file_variables

Expand Down
8 changes: 0 additions & 8 deletions lib/bald/tests/integration/CDL/ereefs_gbr4_ncld.cdl
Original file line number Diff line number Diff line change
Expand Up @@ -126,19 +126,11 @@ variables:
:metadata_link = "http://marlin.csiro.au/geonetwork/srv/eng/search?&uuid=72020224-f086-434a-bbe9-a222c8e5cf0d" ;
:rdf__type = "bald__Container" ;
:bald__isPrefixedBy = "prefix_list" ;
:bald__isAliasedBy = "alias_list" ;

group: prefix_list {
// group attributes:
:bald__ = "http://binary-array-ld.net/latest/" ;
:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ;
} // group bald__prefix_list

group: alias_list {
// group attributes:
:qudt = "http://qudt.org/1.1/schema/qudt";
:ed_gov_au_op = "http://environment.data.gov.au/def/op" ;

} // group bald__alias_list

}
2 changes: 1 addition & 1 deletion lib/bald/tests/integration/CDL/multi_array_reference.cdl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ variables:
set_collection:bald__references = "data_variable1 data_variable2" ;

int list_collection ;
list_collection:bald__references = "data_variable1,data_variable2" ;
list_collection:bald__references = "( data_variable1 data_variable2 )" ;


// global attributes:
Expand Down
30 changes: 30 additions & 0 deletions lib/bald/tests/integration/TTL/ProcessChain0300.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
@prefix CFTerms: <http://def.scitools.org.uk/CFTerms/> .
@prefix DA: <https://codes.nws.noaa.gov/DataAssimilation> .
@prefix NWP: <https://codes.nws.noaa.gov/NumericalWeatherPrediction> .
@prefix StatPP: <https://codes.nws.noaa.gov/StatisticalPostProcessing> .
@prefix bald: <http://binary-array-ld.net/latest/> .
@prefix cf_sname: <http://vocab.nerc.ac.uk/standard_name/> .
@prefix ns1: <file://CDL/ProcessChain0300.cdl/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<file://CDL/ProcessChain0300.cdl> a bald:Container ;
ns1:process_chain "gfsmos_process_chain" ;
bald:contains ns1:gfsmos_process_chain,
ns1:step1,
ns1:step2 ;
bald:isPrefixedBy "prefix_list" .

ns1:gfsmos_process_chain a bald:Subject ;
ns1:OM_Process ( ns1:step1 ns1:step2 ) .

ns1:step1 a bald:Subject ;
ns1:LE_ProcessStep <https://codes.nws.noaa.gov/NumericalWeatherPrediction/Models/GFS13> ;
ns1:LE_Source <https://codes.nws.noaa.gov/DataAssimilation/Methods/GDAS13> .

ns1:step2 a bald:Subject ;
ns1:LE_ProcessStep <https://codes.nws.noaa.gov/StatisticalPostProcessing/Methods/GFSMOS05> ;
ns1:LE_Source <https://codes.nws.noaa.gov/NumericalWeatherPrediction/Models/GFS13> .

Loading