Skip to content

Implement just-in-time context resolution. #342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions lib/ActiveContextCache.js

This file was deleted.

243 changes: 243 additions & 0 deletions lib/ContextResolver.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
/*
* Copyright (c) 2019 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';

const {
isArray: _isArray,
isObject: _isObject,
isString: _isString,
} = require('./types');
const {prependBase} = require('./url');
const JsonLdError = require('./JsonLdError');
const ResolvedContext = require('./ResolvedContext');

const MAX_CONTEXT_URLS = 10;

module.exports = class ContextResolver {
/**
* Creates a ContextResolver.
*
* @param sharedCache a shared LRU cache with `get` and `set` APIs.
*/
constructor({sharedCache}) {
this.perOpCache = new Map();
this.sharedCache = sharedCache;
}

async resolve({context, documentLoader, base, cycles = new Set()}) {
// process `@context`
if(context && _isObject(context) && context['@context']) {
context = context['@context'];
}

// context is one or more contexts
if(!_isArray(context)) {
context = [context];
}

// resolve each context in the array
const allResolved = [];
for(const ctx of context) {
if(_isString(ctx)) {
// see if `ctx` has been resolved before...
let resolved = this._get(ctx);
if(!resolved) {
// not resolved yet, resolve
resolved = await this._resolveRemoteContext(
{url: ctx, documentLoader, base, cycles});
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this have the affect of serializing all remote context fetches?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_resolveRemoteContext will fetch all remote contexts (recursively) and return an array of resolved contexts. Note that this does not include any scoped contexts, i.e., it doesn't deeply inspect the resolved contexts for those, but it will resolve any relative URLs encountered in scoped contexts to ensure the base URL used is proper. Any scoped contexts will be resolved later, JIT.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If by this you meant "the await" -- then, yes, an array of context URLs will be loaded serially now. A future PR can add more complexity here to parallelize that.

}

// add to output and continue
if(_isArray(resolved)) {
allResolved.push(...resolved);
} else {
allResolved.push(resolved);
}
continue;
}
if(ctx === null) {
// handle `null` context, nothing to cache
allResolved.push(new ResolvedContext({document: null}));
continue;
}
if(!_isObject(ctx)) {
_throwInvalidLocalContext(context);
}
// context is an object, get/create `ResolvedContext` for it
const key = JSON.stringify(ctx);
let resolved = this._get(key);
if(!resolved) {
// create a new static `ResolvedContext` and cache it
resolved = new ResolvedContext({document: ctx});
this._cacheResolvedContext({key, resolved, tag: 'static'});
}
allResolved.push(resolved);
}

return allResolved;
}

_get(key) {
// get key from per operation cache; no `tag` is used with this cache so
// any retrieved context will always be the same during a single operation
let resolved = this.perOpCache.get(key);
if(!resolved) {
// see if the shared cache has a `static` entry for this URL
const tagMap = this.sharedCache.get(key);
if(tagMap) {
resolved = tagMap.get('static');
if(resolved) {
this.perOpCache.set(key, resolved);
}
}
}
return resolved;
}

_cacheResolvedContext({key, resolved, tag}) {
this.perOpCache.set(key, resolved);
if(tag !== undefined) {
let tagMap = this.sharedCache.get(key);
if(!tagMap) {
tagMap = new Map();
this.sharedCache.set(key, tagMap);
}
tagMap.set(tag, resolved);
}
return resolved;
}

async _resolveRemoteContext({url, documentLoader, base, cycles}) {
// resolve relative URL and fetch context
url = prependBase(base, url);
const {context, remoteDoc} = await this._fetchContext(
{url, documentLoader, cycles});

// update base according to remote document and resolve any relative URLs
base = remoteDoc.documentUrl || url;
_resolveContextUrls({context, base});

// resolve, cache, and return context
const resolved = await this.resolve(
{context, documentLoader, base, cycles});
this._cacheResolvedContext({key: url, resolved, tag: remoteDoc.tag});
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need some docs on the tag feature. Is 'static' special?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

static is a special value, yes. We can document in a separate PR.

return resolved;
}

async _fetchContext({url, documentLoader, cycles}) {
// check for max context URLs fetched during a resolve operation
if(cycles.size > MAX_CONTEXT_URLS) {
throw new JsonLdError(
'Maximum number of @context URLs exceeded.',
'jsonld.ContextUrlError',
{code: 'loading remote context failed', max: MAX_CONTEXT_URLS});
}

// check for context URL cycle
if(cycles.has(url)) {
throw new JsonLdError(
'Cyclical @context URLs detected.',
'jsonld.ContextUrlError',
{code: 'recursive context inclusion', url});
}

// track cycles
cycles.add(url);

let context;
let remoteDoc;

try {
remoteDoc = await documentLoader(url);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that we're going to need to pass in API arguments to the documentLoader for options such as extractAllScripts, but this can be handled in a different PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. I didn't want to do anything with that in this PR but it will need to be addressed in a subsequent PR.

context = remoteDoc.document || null;
// parse string context as JSON
if(_isString(context)) {
context = JSON.parse(context);
}
} catch(e) {
throw new JsonLdError(
'Dereferencing a URL did not result in a valid JSON-LD object. ' +
'Possible causes are an inaccessible URL perhaps due to ' +
'a same-origin policy (ensure the server uses CORS if you are ' +
'using client-side JavaScript), too many redirects, a ' +
'non-JSON response, or more than one HTTP Link Header was ' +
'provided for a remote context.',
'jsonld.InvalidUrl',
{code: 'loading remote context failed', url, cause: e});
}

// ensure ctx is an object
if(!_isObject(context)) {
throw new JsonLdError(
'Dereferencing a URL did not result in a JSON object. The ' +
'response was valid JSON, but it was not a JSON object.',
'jsonld.InvalidUrl', {code: 'invalid remote context', url});
}

// use empty context if no @context key is present
if(!('@context' in context)) {
context = {'@context': {}};
} else {
context = {'@context': context['@context']};
}

// append @context URL to context if given
if(remoteDoc.contextUrl) {
if(!_isArray(context['@context'])) {
context['@context'] = [context['@context']];
}
context['@context'].push(remoteDoc.contextUrl);
}

return {context, remoteDoc};
}
};

function _throwInvalidLocalContext(ctx) {
throw new JsonLdError(
'Invalid JSON-LD syntax; @context must be an object.',
'jsonld.SyntaxError', {
code: 'invalid local context', context: ctx
});
}

/**
* Resolve all relative `@context` URLs in the given context by inline
* replacing them with absolute URLs.
*
* @param context the context.
* @param base the base IRI to use to resolve relative IRIs.
*/
function _resolveContextUrls({context, base}) {
const ctx = context['@context'];

if(_isString(ctx)) {
context['@context'] = prependBase(base, ctx);
return;
}

if(_isArray(ctx)) {
for(let i = 0; i < ctx.length; ++i) {
const element = ctx[i];
if(_isString(element)) {
ctx[i] = prependBase(base, element);
continue;
}
if(_isObject(element)) {
_resolveContextUrls({context: {'@context': element}, base});
}
}
return;
}

if(!_isObject(ctx)) {
// no @context URLs can be found in non-object
return;
}

// ctx is an object, resolve any context URLs in terms
for(const term in ctx) {
_resolveContextUrls({context: ctx[term], base});
}
}
30 changes: 30 additions & 0 deletions lib/ResolvedContext.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright (c) 2019 Digital Bazaar, Inc. All rights reserved.
*/
'use strict';

const LRU = require('lru-cache');

const MAX_ACTIVE_CONTEXTS = 10;

module.exports = class ResolvedContext {
/**
* Creates a ResolvedContext.
*
* @param document the context document.
*/
constructor({document}) {
this.document = document;
// TODO: enable customization of processed context cache
// TODO: limit based on size of processed contexts vs. number of them
this.cache = new LRU({max: MAX_ACTIVE_CONTEXTS});
}

getProcessed(activeCtx) {
return this.cache.get(activeCtx);
}

setProcessed(activeCtx, processedCtx) {
this.cache.set(activeCtx, processedCtx);
}
};
Loading