Skip to content

Commit 9fcd67d

Browse files
author
Peter Amstutz
committed
Fold link validation into resolve_all() and resolve scoped identifiers.
1 parent da10eec commit 9fcd67d

File tree

4 files changed

+102
-51
lines changed

4 files changed

+102
-51
lines changed

schema_salad/main.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,14 @@ def main(argsl=None): # type: (List[str]) -> int
111111
return 0
112112

113113
# Validate links in the schema document
114-
try:
115-
metaschema_loader.validate_links(schema_doc)
116-
except (validate.ValidationException) as e:
117-
_logger.error("Schema `%s` failed link checking:\n%s",
118-
args.schema, e, exc_info=(e if args.debug else False))
119-
_logger.debug("Index is %s", metaschema_loader.idx.keys())
120-
_logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
121-
return 1
114+
# try:
115+
# metaschema_loader.validate_links(schema_doc)
116+
# except (validate.ValidationException) as e:
117+
# _logger.error("Schema `%s` failed link checking:\n%s",
118+
# args.schema, e, exc_info=(e if args.debug else False))
119+
# _logger.debug("Index is %s", metaschema_loader.idx.keys())
120+
# _logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
121+
# return 1
122122

123123
# Validate the schema document against the metaschema
124124
try:
@@ -197,14 +197,14 @@ def main(argsl=None): # type: (List[str]) -> int
197197
return 0
198198

199199
# Validate links in the target document
200-
try:
201-
document_loader.validate_links(document)
202-
except (validate.ValidationException) as e:
203-
_logger.error("Document `%s` failed link checking:\n%s",
204-
args.document, e, exc_info=(e if args.debug else False))
205-
_logger.debug("Index is %s", json.dumps(
206-
document_loader.idx.keys(), indent=4))
207-
return 1
200+
# try:
201+
# document_loader.validate_links(document)
202+
# except (validate.ValidationException) as e:
203+
# _logger.error("Document `%s` failed link checking:\n%s",
204+
# args.document, e, exc_info=(e if args.debug else False))
205+
# _logger.debug("Index is %s", json.dumps(
206+
# document_loader.idx.keys(), indent=4))
207+
# return 1
208208

209209
# Validate the schema document against the metaschema
210210
try:

schema_salad/ref_resolver.py

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def __init__(self, ctx, schemagraph=None, foreign_properties=None,
9494
self.cache = {}
9595

9696
self.url_fields = None # type: Set[str]
97+
self.scoped_ref_fields = None # type: Set[str]
9798
self.vocab_fields = None # type: Set[str]
9899
self.identifiers = None # type: Set[str]
99100
self.identity_links = None # type: Set[str]
@@ -186,6 +187,7 @@ def add_context(self, newcontext, baseuri=""):
186187
"Refreshing context that already has stuff in it")
187188

188189
self.url_fields = set()
190+
self.scoped_ref_fields = set()
189191
self.vocab_fields = set()
190192
self.identifiers = set()
191193
self.identity_links = set()
@@ -206,6 +208,8 @@ def add_context(self, newcontext, baseuri=""):
206208
self.identity_links.add(key)
207209
elif isinstance(value, dict) and value.get("@type") == "@id":
208210
self.url_fields.add(key)
211+
if value.get("scopedRef", False):
212+
self.scoped_ref_fields.add(key)
209213
if value.get("identity", False):
210214
self.identity_links.add(key)
211215
elif isinstance(value, dict) and value.get("@type") == "@vocab":
@@ -235,7 +239,7 @@ def add_context(self, newcontext, baseuri=""):
235239
_logger.debug("vocab_fields is %s", self.vocab_fields)
236240
_logger.debug("vocab is %s", self.vocab)
237241

238-
def resolve_ref(self, ref, base_url=None):
242+
def resolve_ref(self, ref, base_url=None, toplevel=True):
239243
# type: (Union[Dict[str, Any], str, unicode], Union[str, unicode]) -> Tuple[Union[Dict[str, Any], str, unicode], Dict[str, Any]]
240244
base_url = base_url or 'file://%s/' % os.path.abspath('.')
241245

@@ -297,7 +301,7 @@ def resolve_ref(self, ref, base_url=None):
297301
doc = self.fetch(doc_url)
298302

299303
# Recursively expand urls and resolve directives
300-
obj, metadata = self.resolve_all(doc if doc else obj, doc_url)
304+
obj, metadata = self.resolve_all(doc if doc else obj, doc_url, toplevel=toplevel)
301305

302306
# Requested reference should be in the index now, otherwise it's a bad
303307
# reference
@@ -318,7 +322,7 @@ def resolve_ref(self, ref, base_url=None):
318322
except TypeError:
319323
return obj, metadata
320324

321-
def resolve_all(self, document, base_url, file_base=None):
325+
def resolve_all(self, document, base_url, file_base=None, toplevel=True):
322326
# type: (Any, Union[str, unicode], Union[str, unicode]) -> Tuple[Any, Dict[str, Any]]
323327
loader = self
324328
metadata = {} # type: Dict[str, Any]
@@ -328,7 +332,7 @@ def resolve_all(self, document, base_url, file_base=None):
328332
if isinstance(document, dict):
329333
# Handle $import and $include
330334
if ('$import' in document or '$include' in document):
331-
return self.resolve_ref(document, file_base)
335+
return self.resolve_ref(document, base_url=file_base, toplevel=toplevel)
332336
elif isinstance(document, list):
333337
pass
334338
else:
@@ -364,7 +368,7 @@ def resolve_all(self, document, base_url, file_base=None):
364368
if "$graph" in document:
365369
metadata = _copy_dict_without_key(document, "$graph")
366370
document = document["$graph"]
367-
metadata, _ = loader.resolve_all(metadata, base_url, file_base)
371+
metadata, _ = loader.resolve_all(metadata, base_url, file_base=file_base, toplevel=False)
368372

369373
if isinstance(document, dict):
370374
for idmapField in loader.idmap:
@@ -412,6 +416,8 @@ def resolve_all(self, document, base_url, file_base=None):
412416
del document[d]
413417

414418
for d in loader.url_fields:
419+
if d in self.scoped_ref_fields:
420+
continue
415421
if d in document:
416422
if isinstance(document[d], basestring):
417423
document[d] = loader.expand_url(
@@ -427,7 +433,7 @@ def resolve_all(self, document, base_url, file_base=None):
427433
try:
428434
for key, val in document.items():
429435
document[key], _ = loader.resolve_all(
430-
val, base_url, file_base)
436+
val, base_url, file_base=file_base, toplevel=False)
431437
except validate.ValidationException as v:
432438
_logger.debug("loader is %s", id(loader))
433439
raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (
@@ -439,7 +445,7 @@ def resolve_all(self, document, base_url, file_base=None):
439445
while i < len(document):
440446
val = document[i]
441447
if isinstance(val, dict) and "$import" in val:
442-
l, _ = loader.resolve_ref(val, file_base)
448+
l, _ = loader.resolve_ref(val, base_url=file_base, toplevel=False)
443449
if isinstance(l, list):
444450
del document[i]
445451
for item in aslist(l):
@@ -450,7 +456,7 @@ def resolve_all(self, document, base_url, file_base=None):
450456
i += 1
451457
else:
452458
document[i], _ = loader.resolve_all(
453-
val, base_url, file_base)
459+
val, base_url, file_base=file_base, toplevel=False)
454460
i += 1
455461
except validate.ValidationException as v:
456462
raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (
@@ -463,6 +469,9 @@ def resolve_all(self, document, base_url, file_base=None):
463469
metadata[identifer], base_url, scoped=True)
464470
loader.idx[metadata[identifer]] = document
465471

472+
if toplevel:
473+
self.validate_links(document, "")
474+
466475
return document, metadata
467476

468477
def fetch_text(self, url):
@@ -522,36 +531,51 @@ def check_file(self, fn): # type: (Union[str, unicode]) -> bool
522531
else:
523532
return False
524533

525-
def validate_link(self, field, link):
534+
def validate_link(self, field, link, docid):
526535
# type: (str, Union[str, unicode, List[str], Dict[str, Any]]) -> bool
527536
if field in self.nolinkcheck:
528-
return True
537+
return link
529538
if isinstance(link, (str, unicode)):
530539
if field in self.vocab_fields:
531540
if link not in self.vocab and link not in self.idx and link not in self.rvocab:
532541
if not self.check_file(link):
533542
raise validate.ValidationException(
534543
"Field `%s` contains undefined reference to `%s`" % (field, link))
535544
elif link not in self.idx and link not in self.rvocab:
536-
if not self.check_file(link):
545+
if field in self.scoped_ref_fields:
546+
split = urlparse.urlsplit(docid)
547+
sp = split.fragment.split("/")
548+
while len(sp) > 0:
549+
sp.pop()
550+
sp.append(link)
551+
url = urlparse.urlunsplit(
552+
(split.scheme, split.netloc, split.path, split.query, "/".join(sp)))
553+
if url in self.idx:
554+
print link, "is", url
555+
return url
556+
else:
557+
sp.pop()
558+
raise validate.ValidationException(
559+
"Field `%s` contains undefined reference to `%s`" % (field, link))
560+
elif not self.check_file(link):
537561
raise validate.ValidationException(
538562
"Field `%s` contains undefined reference to `%s`" % (field, link))
539563
elif isinstance(link, list):
540564
errors = []
541-
for i in link:
565+
for n, i in enumerate(link):
542566
try:
543-
self.validate_link(field, i)
567+
link[n] = self.validate_link(field, i, docid)
544568
except validate.ValidationException as v:
545569
errors.append(v)
546570
if errors:
547571
raise validate.ValidationException(
548572
"\n".join([str(e) for e in errors]))
549573
elif isinstance(link, dict):
550-
self.validate_links(link)
574+
self.validate_links(link, docid)
551575
else:
552576
raise validate.ValidationException("Link must be a str, unicode, "
553577
"list, or a dict.")
554-
return True
578+
return link
555579

556580
def getid(self, d): # type: (Any) -> Union[basestring, None]
557581
if isinstance(d, dict):
@@ -561,10 +585,10 @@ def getid(self, d): # type: (Any) -> Union[basestring, None]
561585
return d[i]
562586
return None
563587

564-
def validate_links(self, document): # type: (Any) -> None
588+
def validate_links(self, document, base_url): # type: (Any) -> None
565589
docid = self.getid(document)
566-
if docid is None:
567-
docid = ""
590+
if not docid:
591+
docid = base_url
568592

569593
errors = []
570594
iterator = None # type: Any
@@ -573,26 +597,26 @@ def validate_links(self, document): # type: (Any) -> None
573597
elif isinstance(document, dict):
574598
try:
575599
for d in self.url_fields:
576-
if d not in self.identity_links and d in document:
577-
self.validate_link(d, document[d])
600+
if d in document and d not in self.identity_links:
601+
document[d] = self.validate_link(d, document[d], docid)
578602
except validate.ValidationException as v:
579603
errors.append(v)
580604
if hasattr(document, "iteritems"):
581605
iterator = document.iteritems()
582606
else:
583607
iterator = document.items()
584608
else:
585-
return
609+
return document
586610

587611
for key, val in iterator:
588612
try:
589-
self.validate_links(val)
613+
document[key] = self.validate_links(val, docid)
590614
except validate.ValidationException as v:
591615
if key not in self.nolinkcheck:
592-
docid = self.getid(val)
593-
if docid:
616+
docid2 = self.getid(val)
617+
if docid2:
594618
errors.append(validate.ValidationException(
595-
"While checking object `%s`\n%s" % (docid, validate.indent(str(v)))))
619+
"While checking object `%s`\n%s" % (docid2, validate.indent(str(v)))))
596620
else:
597621
if isinstance(key, basestring):
598622
errors.append(validate.ValidationException(
@@ -607,7 +631,7 @@ def validate_links(self, document): # type: (Any) -> None
607631
"\n".join([str(e) for e in errors]))
608632
else:
609633
raise errors[0]
610-
return
634+
return document
611635

612636

613637
def _copy_dict_without_key(from_dict, filtered_key):

schema_salad/schema.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ def load_and_validate(document_loader, avsc_names, document, strict):
190190
else:
191191
data, metadata = document_loader.resolve_ref(document)
192192

193-
document_loader.validate_links(data)
194193
validate_doc(avsc_names, data, document_loader, strict)
195194
return data, metadata
196195

tests/test_examples.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -97,16 +97,44 @@ def test_idmap(self):
9797
}
9898
}, "http://example2.com/")
9999

100-
self.assertEqual(ra["id"], "http://example2.com/#stuff")
100+
self.assertEqual("http://example2.com/#stuff", ra["id"])
101101
for item in ra["inputs"]:
102102
if item["a"] == 2:
103-
self.assertEquals(item["id"],
104-
'http://example2.com/#stuff/zing')
103+
self.assertEquals('http://example2.com/#stuff/zing', item["id"])
105104
else:
106-
self.assertEquals(item["id"],
107-
'http://example2.com/#stuff/zip')
108-
self.assertEquals(ra['outputs'], ['http://example2.com/#stuff/out'])
109-
self.assertEquals(ra['other'], {'n': 9})
105+
self.assertEquals('http://example2.com/#stuff/zip', item["id"])
106+
self.assertEquals(['http://example2.com/#stuff/out'], ra['outputs'])
107+
self.assertEquals({'n': 9}, ra['other'])
108+
109+
def test_scoped_ref(self):
110+
ldr = schema_salad.ref_resolver.Loader({})
111+
ldr.add_context({
112+
"ref": {
113+
"@type": "@id",
114+
"scopedRef": True,
115+
},
116+
"id": "@id"})
117+
118+
ra, _ = ldr.resolve_all({
119+
"id": "foo",
120+
"blurb": {
121+
"id": "bar",
122+
"blurb": {
123+
"id": "baz",
124+
"ref": ["foo", "bar", "baz"]
125+
}
126+
}
127+
}, "http://example2.com/")
128+
129+
self.assertEquals({'id': 'http://example2.com/#foo',
130+
'blurb': {
131+
'id': 'http://example2.com/#foo/bar',
132+
'blurb': {
133+
'ref': ['http://example2.com/#foo',
134+
'http://example2.com/#foo/bar',
135+
'http://example2.com/#foo/bar/baz'],
136+
'id': 'http://example2.com/#foo/bar/baz'}}},
137+
ra)
110138

111139
def test_examples(self):
112140
self.maxDiff = None
@@ -115,7 +143,7 @@ def test_examples(self):
115143
"schema_salad/metaschema/%s_schema.yml" % a)
116144
with open("schema_salad/metaschema/%s_src.yml" % a) as src_fp:
117145
src = ldr.resolve_all(
118-
yaml.load(src_fp, Loader=SafeLoader), "")[0]
146+
yaml.load(src_fp, Loader=SafeLoader), "", toplevel=False)[0]
119147
with open("schema_salad/metaschema/%s_proc.yml" % a) as src_proc:
120148
proc = yaml.load(src_proc, Loader=SafeLoader)
121149
self.assertEqual(proc, src)

0 commit comments

Comments
 (0)