-
Notifications
You must be signed in to change notification settings - Fork 203
Implement just-in-time context resolution. #342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,243 @@ | ||
/* | ||
* Copyright (c) 2019 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
|
||
const { | ||
isArray: _isArray, | ||
isObject: _isObject, | ||
isString: _isString, | ||
} = require('./types'); | ||
const {prependBase} = require('./url'); | ||
const JsonLdError = require('./JsonLdError'); | ||
const ResolvedContext = require('./ResolvedContext'); | ||
|
||
const MAX_CONTEXT_URLS = 10; | ||
|
||
module.exports = class ContextResolver { | ||
/** | ||
* Creates a ContextResolver. | ||
* | ||
* @param sharedCache a shared LRU cache with `get` and `set` APIs. | ||
*/ | ||
constructor({sharedCache}) { | ||
this.perOpCache = new Map(); | ||
this.sharedCache = sharedCache; | ||
} | ||
|
||
async resolve({context, documentLoader, base, cycles = new Set()}) { | ||
// process `@context` | ||
if(context && _isObject(context) && context['@context']) { | ||
context = context['@context']; | ||
} | ||
|
||
// context is one or more contexts | ||
if(!_isArray(context)) { | ||
context = [context]; | ||
} | ||
|
||
// resolve each context in the array | ||
const allResolved = []; | ||
for(const ctx of context) { | ||
if(_isString(ctx)) { | ||
// see if `ctx` has been resolved before... | ||
let resolved = this._get(ctx); | ||
if(!resolved) { | ||
// not resolved yet, resolve | ||
resolved = await this._resolveRemoteContext( | ||
{url: ctx, documentLoader, base, cycles}); | ||
} | ||
|
||
// add to output and continue | ||
if(_isArray(resolved)) { | ||
allResolved.push(...resolved); | ||
} else { | ||
allResolved.push(resolved); | ||
} | ||
continue; | ||
} | ||
if(ctx === null) { | ||
// handle `null` context, nothing to cache | ||
allResolved.push(new ResolvedContext({document: null})); | ||
continue; | ||
} | ||
if(!_isObject(ctx)) { | ||
_throwInvalidLocalContext(context); | ||
} | ||
// context is an object, get/create `ResolvedContext` for it | ||
const key = JSON.stringify(ctx); | ||
let resolved = this._get(key); | ||
if(!resolved) { | ||
// create a new static `ResolvedContext` and cache it | ||
resolved = new ResolvedContext({document: ctx}); | ||
this._cacheResolvedContext({key, resolved, tag: 'static'}); | ||
} | ||
allResolved.push(resolved); | ||
} | ||
|
||
return allResolved; | ||
} | ||
|
||
_get(key) { | ||
// get key from per operation cache; no `tag` is used with this cache so | ||
// any retrieved context will always be the same during a single operation | ||
let resolved = this.perOpCache.get(key); | ||
if(!resolved) { | ||
// see if the shared cache has a `static` entry for this URL | ||
const tagMap = this.sharedCache.get(key); | ||
if(tagMap) { | ||
resolved = tagMap.get('static'); | ||
if(resolved) { | ||
this.perOpCache.set(key, resolved); | ||
} | ||
} | ||
} | ||
return resolved; | ||
} | ||
|
||
_cacheResolvedContext({key, resolved, tag}) { | ||
this.perOpCache.set(key, resolved); | ||
if(tag !== undefined) { | ||
let tagMap = this.sharedCache.get(key); | ||
if(!tagMap) { | ||
tagMap = new Map(); | ||
this.sharedCache.set(key, tagMap); | ||
} | ||
tagMap.set(tag, resolved); | ||
} | ||
return resolved; | ||
} | ||
|
||
async _resolveRemoteContext({url, documentLoader, base, cycles}) { | ||
// resolve relative URL and fetch context | ||
url = prependBase(base, url); | ||
const {context, remoteDoc} = await this._fetchContext( | ||
{url, documentLoader, cycles}); | ||
|
||
// update base according to remote document and resolve any relative URLs | ||
base = remoteDoc.documentUrl || url; | ||
_resolveContextUrls({context, base}); | ||
|
||
// resolve, cache, and return context | ||
const resolved = await this.resolve( | ||
{context, documentLoader, base, cycles}); | ||
this._cacheResolvedContext({key: url, resolved, tag: remoteDoc.tag}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need some docs on the tag feature. Is 'static' special? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return resolved; | ||
} | ||
|
||
async _fetchContext({url, documentLoader, cycles}) { | ||
// check for max context URLs fetched during a resolve operation | ||
if(cycles.size > MAX_CONTEXT_URLS) { | ||
throw new JsonLdError( | ||
'Maximum number of @context URLs exceeded.', | ||
'jsonld.ContextUrlError', | ||
{code: 'loading remote context failed', max: MAX_CONTEXT_URLS}); | ||
} | ||
|
||
// check for context URL cycle | ||
if(cycles.has(url)) { | ||
throw new JsonLdError( | ||
'Cyclical @context URLs detected.', | ||
'jsonld.ContextUrlError', | ||
{code: 'recursive context inclusion', url}); | ||
} | ||
|
||
// track cycles | ||
cycles.add(url); | ||
|
||
let context; | ||
let remoteDoc; | ||
|
||
try { | ||
remoteDoc = await documentLoader(url); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that we're going to need to pass in API arguments to the documentLoader for options such as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I didn't want to do anything with that in this PR but it will need to be addressed in a subsequent PR. |
||
context = remoteDoc.document || null; | ||
// parse string context as JSON | ||
if(_isString(context)) { | ||
context = JSON.parse(context); | ||
} | ||
} catch(e) { | ||
throw new JsonLdError( | ||
'Dereferencing a URL did not result in a valid JSON-LD object. ' + | ||
'Possible causes are an inaccessible URL perhaps due to ' + | ||
'a same-origin policy (ensure the server uses CORS if you are ' + | ||
'using client-side JavaScript), too many redirects, a ' + | ||
'non-JSON response, or more than one HTTP Link Header was ' + | ||
'provided for a remote context.', | ||
'jsonld.InvalidUrl', | ||
{code: 'loading remote context failed', url, cause: e}); | ||
} | ||
|
||
// ensure ctx is an object | ||
if(!_isObject(context)) { | ||
throw new JsonLdError( | ||
'Dereferencing a URL did not result in a JSON object. The ' + | ||
'response was valid JSON, but it was not a JSON object.', | ||
'jsonld.InvalidUrl', {code: 'invalid remote context', url}); | ||
} | ||
|
||
// use empty context if no @context key is present | ||
if(!('@context' in context)) { | ||
context = {'@context': {}}; | ||
} else { | ||
context = {'@context': context['@context']}; | ||
} | ||
|
||
// append @context URL to context if given | ||
if(remoteDoc.contextUrl) { | ||
if(!_isArray(context['@context'])) { | ||
context['@context'] = [context['@context']]; | ||
} | ||
context['@context'].push(remoteDoc.contextUrl); | ||
} | ||
|
||
return {context, remoteDoc}; | ||
} | ||
}; | ||
|
||
function _throwInvalidLocalContext(ctx) { | ||
throw new JsonLdError( | ||
'Invalid JSON-LD syntax; @context must be an object.', | ||
'jsonld.SyntaxError', { | ||
code: 'invalid local context', context: ctx | ||
}); | ||
} | ||
|
||
/** | ||
* Resolve all relative `@context` URLs in the given context by inline | ||
* replacing them with absolute URLs. | ||
* | ||
* @param context the context. | ||
* @param base the base IRI to use to resolve relative IRIs. | ||
*/ | ||
function _resolveContextUrls({context, base}) { | ||
const ctx = context['@context']; | ||
|
||
if(_isString(ctx)) { | ||
context['@context'] = prependBase(base, ctx); | ||
return; | ||
} | ||
|
||
if(_isArray(ctx)) { | ||
for(let i = 0; i < ctx.length; ++i) { | ||
const element = ctx[i]; | ||
if(_isString(element)) { | ||
ctx[i] = prependBase(base, element); | ||
continue; | ||
} | ||
if(_isObject(element)) { | ||
_resolveContextUrls({context: {'@context': element}, base}); | ||
} | ||
} | ||
return; | ||
} | ||
|
||
if(!_isObject(ctx)) { | ||
// no @context URLs can be found in non-object | ||
return; | ||
} | ||
|
||
// ctx is an object, resolve any context URLs in terms | ||
for(const term in ctx) { | ||
_resolveContextUrls({context: ctx[term], base}); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
* Copyright (c) 2019 Digital Bazaar, Inc. All rights reserved. | ||
*/ | ||
'use strict'; | ||
|
||
const LRU = require('lru-cache'); | ||
|
||
const MAX_ACTIVE_CONTEXTS = 10; | ||
|
||
module.exports = class ResolvedContext { | ||
/** | ||
* Creates a ResolvedContext. | ||
* | ||
* @param document the context document. | ||
*/ | ||
constructor({document}) { | ||
this.document = document; | ||
// TODO: enable customization of processed context cache | ||
// TODO: limit based on size of processed contexts vs. number of them | ||
this.cache = new LRU({max: MAX_ACTIVE_CONTEXTS}); | ||
} | ||
|
||
getProcessed(activeCtx) { | ||
return this.cache.get(activeCtx); | ||
} | ||
|
||
setProcessed(activeCtx, processedCtx) { | ||
this.cache.set(activeCtx, processedCtx); | ||
} | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this have the affect of serializing all remote context fetches?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_resolveRemoteContext
will fetch all remote contexts (recursively) and return an array of resolved contexts. Note that this does not include any scoped contexts, i.e., it doesn't deeply inspect the resolved contexts for those, but it will resolve any relative URLs encountered in scoped contexts to ensure the base URL used is proper. Any scoped contexts will be resolved later, JIT.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If by
this
you meant "theawait
" -- then, yes, an array of context URLs will be loaded serially now. A future PR can add more complexity here to parallelize that.