Skip to content

Commit ad8a111

Browse files
committed
Merge pull request #28 from common-workflow-language/scoped_ref
Scoped ref
2 parents da10eec + 5aadf27 commit ad8a111

File tree

6 files changed

+190
-66
lines changed

6 files changed

+190
-66
lines changed

schema_salad/main.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,16 @@ def main(argsl=None): # type: (List[str]) -> int
9898
if not urlparse.urlparse(schema_uri)[0]:
9999
schema_uri = "file://" + os.path.abspath(schema_uri)
100100
schema_raw_doc = metaschema_loader.fetch(schema_uri)
101-
schema_doc, schema_metadata = metaschema_loader.resolve_all(
102-
schema_raw_doc, schema_uri)
101+
102+
try:
103+
schema_doc, schema_metadata = metaschema_loader.resolve_all(
104+
schema_raw_doc, schema_uri)
105+
except (validate.ValidationException) as e:
106+
_logger.error("Schema `%s` failed link checking:\n%s",
107+
args.schema, e, exc_info=(e if args.debug else False))
108+
_logger.debug("Index is %s", metaschema_loader.idx.keys())
109+
_logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
110+
return 1
103111

104112
# Optionally print the schema after ref resolution
105113
if not args.document and args.print_pre:
@@ -110,16 +118,6 @@ def main(argsl=None): # type: (List[str]) -> int
110118
print(json.dumps(metaschema_loader.idx.keys(), indent=4))
111119
return 0
112120

113-
# Validate links in the schema document
114-
try:
115-
metaschema_loader.validate_links(schema_doc)
116-
except (validate.ValidationException) as e:
117-
_logger.error("Schema `%s` failed link checking:\n%s",
118-
args.schema, e, exc_info=(e if args.debug else False))
119-
_logger.debug("Index is %s", metaschema_loader.idx.keys())
120-
_logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
121-
return 1
122-
123121
# Validate the schema document against the metaschema
124122
try:
125123
schema.validate_doc(metaschema_names, schema_doc,
@@ -196,16 +194,6 @@ def main(argsl=None): # type: (List[str]) -> int
196194
print(json.dumps(document_loader.idx.keys(), indent=4))
197195
return 0
198196

199-
# Validate links in the target document
200-
try:
201-
document_loader.validate_links(document)
202-
except (validate.ValidationException) as e:
203-
_logger.error("Document `%s` failed link checking:\n%s",
204-
args.document, e, exc_info=(e if args.debug else False))
205-
_logger.debug("Index is %s", json.dumps(
206-
document_loader.idx.keys(), indent=4))
207-
return 1
208-
209197
# Validate the schema document against the metaschema
210198
try:
211199
schema.validate_doc(avsc_names, document,

schema_salad/metaschema/metaschema.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,23 @@ $graph:
139139
the item is transformed to a JSON object with the key assigned to the
140140
field specified by `mapSubject` and the value assigned to the field
141141
specified by `mapPredicate`.
142+
- name: refScope
143+
type:
144+
- "null"
145+
- int
146+
doc: |
147+
If the field contains a relative reference, it must be resolved by
148+
searching for valid document references in each successive parent scope
149+
in the document fragment. For example, a reference of `foo` in the
150+
context `#foo/bar/baz` will first check for the existence of
151+
`#foo/bar/baz/foo`, followed by `#foo/bar/foo`, then `#foo/foo` and
152+
then finally `#foo`. The first valid URI in the search order shall be
153+
used as the fully resolved value of the identifier. The value of the
154+
refScope field is the specified number of levels from the containing
155+
identifer scope before starting the search, so if `refScope: 2` then
156+
"baz" and "bar" must be stripped to get the base `#foo` and search
157+
`#foo/foo` and the `#foo`. The last scope searched must be the top
158+
level scope before determining if the identifier cannot be resolved.
142159
143160
- name: SpecializeDef
144161
type: record

schema_salad/ref_resolver.py

Lines changed: 67 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from rdflib.namespace import RDF, RDFS, OWL
2020
from rdflib.plugins.parsers.notation3 import BadSyntax
2121
import xml.sax
22-
from typing import cast, Union, Tuple, Dict, Any, Callable, Iterable
22+
from typing import Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple, TypeVar, Union
2323

2424
_logger = logging.getLogger("salad")
2525

@@ -94,6 +94,7 @@ def __init__(self, ctx, schemagraph=None, foreign_properties=None,
9494
self.cache = {}
9595

9696
self.url_fields = None # type: Set[str]
97+
self.scoped_ref_fields = None # type: Dict[unicode, int]
9798
self.vocab_fields = None # type: Set[str]
9899
self.identifiers = None # type: Set[str]
99100
self.identity_links = None # type: Set[str]
@@ -186,6 +187,7 @@ def add_context(self, newcontext, baseuri=""):
186187
"Refreshing context that already has stuff in it")
187188

188189
self.url_fields = set()
190+
self.scoped_ref_fields = {}
189191
self.vocab_fields = set()
190192
self.identifiers = set()
191193
self.identity_links = set()
@@ -206,6 +208,8 @@ def add_context(self, newcontext, baseuri=""):
206208
self.identity_links.add(key)
207209
elif isinstance(value, dict) and value.get("@type") == "@id":
208210
self.url_fields.add(key)
211+
if "refScope" in value:
212+
self.scoped_ref_fields[key] = value["refScope"]
209213
if value.get("identity", False):
210214
self.identity_links.add(key)
211215
elif isinstance(value, dict) and value.get("@type") == "@vocab":
@@ -235,8 +239,8 @@ def add_context(self, newcontext, baseuri=""):
235239
_logger.debug("vocab_fields is %s", self.vocab_fields)
236240
_logger.debug("vocab is %s", self.vocab)
237241

238-
def resolve_ref(self, ref, base_url=None):
239-
# type: (Union[Dict[str, Any], str, unicode], Union[str, unicode]) -> Tuple[Union[Dict[str, Any], str, unicode], Dict[str, Any]]
242+
def resolve_ref(self, ref, base_url=None, checklinks=True):
243+
# type: (Union[Dict[str, Any], str, unicode], Union[str, unicode], bool) -> Tuple[Union[Dict[str, Any], str, unicode], Dict[str, Any]]
240244
base_url = base_url or 'file://%s/' % os.path.abspath('.')
241245

242246
obj = None # type: Dict[str, Any]
@@ -297,7 +301,7 @@ def resolve_ref(self, ref, base_url=None):
297301
doc = self.fetch(doc_url)
298302

299303
# Recursively expand urls and resolve directives
300-
obj, metadata = self.resolve_all(doc if doc else obj, doc_url)
304+
obj, metadata = self.resolve_all(doc if doc else obj, doc_url, checklinks=checklinks)
301305

302306
# Requested reference should be in the index now, otherwise it's a bad
303307
# reference
@@ -318,8 +322,8 @@ def resolve_ref(self, ref, base_url=None):
318322
except TypeError:
319323
return obj, metadata
320324

321-
def resolve_all(self, document, base_url, file_base=None):
322-
# type: (Any, Union[str, unicode], Union[str, unicode]) -> Tuple[Any, Dict[str, Any]]
325+
def resolve_all(self, document, base_url, file_base=None, checklinks=True):
326+
# type: (Any, Union[str, unicode], Union[str, unicode], bool) -> Tuple[Any, Dict[str, Any]]
323327
loader = self
324328
metadata = {} # type: Dict[str, Any]
325329
if file_base is None:
@@ -328,7 +332,7 @@ def resolve_all(self, document, base_url, file_base=None):
328332
if isinstance(document, dict):
329333
# Handle $import and $include
330334
if ('$import' in document or '$include' in document):
331-
return self.resolve_ref(document, file_base)
335+
return self.resolve_ref(document, base_url=file_base, checklinks=checklinks)
332336
elif isinstance(document, list):
333337
pass
334338
else:
@@ -364,21 +368,22 @@ def resolve_all(self, document, base_url, file_base=None):
364368
if "$graph" in document:
365369
metadata = _copy_dict_without_key(document, "$graph")
366370
document = document["$graph"]
367-
metadata, _ = loader.resolve_all(metadata, base_url, file_base)
371+
metadata, _ = loader.resolve_all(metadata, base_url, file_base=file_base, checklinks=False)
368372

369373
if isinstance(document, dict):
370374
for idmapField in loader.idmap:
371375
if (idmapField in document and isinstance(document[idmapField], dict) and
372376
"$import" not in document[idmapField] and
373377
"$include" not in document[idmapField]):
374378
ls = []
375-
for k, v in document[idmapField].items():
379+
for k in sorted(document[idmapField].keys()):
380+
v = document[idmapField][k]
376381
if not isinstance(v, dict):
377382
if idmapField in loader.mapPredicate:
378383
v = {loader.mapPredicate[idmapField]: v}
379384
else:
380385
raise validate.ValidationException(
381-
"mapSubject '%s' value '%s' is not a dict and does not have a mapPredicate", k, v)
386+
"mapSubject '%s' value '%s' is not a dict and does not have a mapPredicate" % (k, v))
382387
v[loader.idmap[idmapField]] = k
383388
ls.append(v)
384389
document[idmapField] = ls
@@ -412,6 +417,8 @@ def resolve_all(self, document, base_url, file_base=None):
412417
del document[d]
413418

414419
for d in loader.url_fields:
420+
if d in self.scoped_ref_fields:
421+
continue
415422
if d in document:
416423
if isinstance(document[d], basestring):
417424
document[d] = loader.expand_url(
@@ -427,7 +434,7 @@ def resolve_all(self, document, base_url, file_base=None):
427434
try:
428435
for key, val in document.items():
429436
document[key], _ = loader.resolve_all(
430-
val, base_url, file_base)
437+
val, base_url, file_base=file_base, checklinks=False)
431438
except validate.ValidationException as v:
432439
_logger.debug("loader is %s", id(loader))
433440
raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (
@@ -439,7 +446,7 @@ def resolve_all(self, document, base_url, file_base=None):
439446
while i < len(document):
440447
val = document[i]
441448
if isinstance(val, dict) and "$import" in val:
442-
l, _ = loader.resolve_ref(val, file_base)
449+
l, _ = loader.resolve_ref(val, base_url=file_base, checklinks=False)
443450
if isinstance(l, list):
444451
del document[i]
445452
for item in aslist(l):
@@ -450,7 +457,7 @@ def resolve_all(self, document, base_url, file_base=None):
450457
i += 1
451458
else:
452459
document[i], _ = loader.resolve_all(
453-
val, base_url, file_base)
460+
val, base_url, file_base=file_base, checklinks=False)
454461
i += 1
455462
except validate.ValidationException as v:
456463
raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (
@@ -463,6 +470,9 @@ def resolve_all(self, document, base_url, file_base=None):
463470
metadata[identifer], base_url, scoped=True)
464471
loader.idx[metadata[identifer]] = document
465472

473+
if checklinks:
474+
self.validate_links(document, "")
475+
466476
return document, metadata
467477

468478
def fetch_text(self, url):
@@ -522,49 +532,72 @@ def check_file(self, fn): # type: (Union[str, unicode]) -> bool
522532
else:
523533
return False
524534

525-
def validate_link(self, field, link):
526-
# type: (str, Union[str, unicode, List[str], Dict[str, Any]]) -> bool
535+
FieldType = TypeVar('FieldType', unicode, List[str], Dict[str, Any])
536+
537+
def validate_link(self, field, link, docid):
538+
# type: (AnyStr, FieldType, AnyStr) -> FieldType
527539
if field in self.nolinkcheck:
528-
return True
540+
return link
529541
if isinstance(link, (str, unicode)):
530542
if field in self.vocab_fields:
531543
if link not in self.vocab and link not in self.idx and link not in self.rvocab:
532544
if not self.check_file(link):
533545
raise validate.ValidationException(
534546
"Field `%s` contains undefined reference to `%s`" % (field, link))
535547
elif link not in self.idx and link not in self.rvocab:
536-
if not self.check_file(link):
548+
if field in self.scoped_ref_fields:
549+
split = urlparse.urlsplit(docid)
550+
sp = split.fragment.split("/")
551+
n = self.scoped_ref_fields[field]
552+
while n > 0 and len(sp) > 0:
553+
sp.pop()
554+
n -= 1
555+
while True:
556+
sp.append(str(link))
557+
url = urlparse.urlunsplit(
558+
(split.scheme, split.netloc, split.path, split.query, "/".join(sp)))
559+
if url in self.idx:
560+
return url
561+
sp.pop()
562+
if len(sp) == 0:
563+
break
564+
sp.pop()
565+
raise validate.ValidationException(
566+
"Field `%s` contains undefined reference to `%s`" % (field, link))
567+
elif not self.check_file(link):
537568
raise validate.ValidationException(
538569
"Field `%s` contains undefined reference to `%s`" % (field, link))
539570
elif isinstance(link, list):
540571
errors = []
541-
for i in link:
572+
for n, i in enumerate(link):
542573
try:
543-
self.validate_link(field, i)
574+
link[n] = self.validate_link(field, i, docid)
544575
except validate.ValidationException as v:
545576
errors.append(v)
546577
if errors:
547578
raise validate.ValidationException(
548579
"\n".join([str(e) for e in errors]))
549580
elif isinstance(link, dict):
550-
self.validate_links(link)
581+
self.validate_links(link, docid)
551582
else:
552583
raise validate.ValidationException("Link must be a str, unicode, "
553584
"list, or a dict.")
554-
return True
585+
return link
555586

556-
def getid(self, d): # type: (Any) -> Union[basestring, None]
587+
def getid(self, d): # type: (Any) -> Union[str, unicode]
557588
if isinstance(d, dict):
558589
for i in self.identifiers:
559590
if i in d:
560-
if isinstance(d[i], basestring):
591+
if isinstance(d[i], (str, unicode)):
561592
return d[i]
562593
return None
563594

564-
def validate_links(self, document): # type: (Any) -> None
595+
DocumentType = TypeVar('DocumentType')
596+
597+
def validate_links(self, document, base_url): # type: (DocumentType, Union[str, unicode]) -> DocumentType
565598
docid = self.getid(document)
566-
if docid is None:
567-
docid = ""
599+
if not docid:
600+
docid = base_url
568601

569602
errors = []
570603
iterator = None # type: Any
@@ -573,26 +606,26 @@ def validate_links(self, document): # type: (Any) -> None
573606
elif isinstance(document, dict):
574607
try:
575608
for d in self.url_fields:
576-
if d not in self.identity_links and d in document:
577-
self.validate_link(d, document[d])
609+
if d in document and d not in self.identity_links:
610+
document[d] = self.validate_link(d, document[d], docid)
578611
except validate.ValidationException as v:
579612
errors.append(v)
580613
if hasattr(document, "iteritems"):
581614
iterator = document.iteritems()
582615
else:
583616
iterator = document.items()
584617
else:
585-
return
618+
return document
586619

587620
for key, val in iterator:
588621
try:
589-
self.validate_links(val)
622+
document[key] = self.validate_links(val, docid) # type: ignore
590623
except validate.ValidationException as v:
591624
if key not in self.nolinkcheck:
592-
docid = self.getid(val)
593-
if docid:
625+
docid2 = self.getid(val)
626+
if docid2:
594627
errors.append(validate.ValidationException(
595-
"While checking object `%s`\n%s" % (docid, validate.indent(str(v)))))
628+
"While checking object `%s`\n%s" % (docid2, validate.indent(str(v)))))
596629
else:
597630
if isinstance(key, basestring):
598631
errors.append(validate.ValidationException(
@@ -607,7 +640,7 @@ def validate_links(self, document): # type: (Any) -> None
607640
"\n".join([str(e) for e in errors]))
608641
else:
609642
raise errors[0]
610-
return
643+
return document
611644

612645

613646
def _copy_dict_without_key(from_dict, filtered_key):

schema_salad/schema.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ def load_and_validate(document_loader, avsc_names, document, strict):
190190
else:
191191
data, metadata = document_loader.resolve_ref(document)
192192

193-
document_loader.validate_links(data)
194193
validate_doc(avsc_names, data, document_loader, strict)
195194
return data, metadata
196195

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
extras_require = {} # TODO: to be removed when the above is added
4242

4343
setup(name='schema-salad',
44-
version='1.11',
44+
version='1.12',
4545
description='Schema Annotations for Linked Avro Data (SALAD)',
4646
long_description=open(README).read(),
4747
author='Common workflow language working group',

0 commit comments

Comments
 (0)