diff --git a/CHANGELOG.md b/CHANGELOG.md index ee977592..5fa05654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -205,6 +205,8 @@ - Use rdf-canonize to compare n-quads test results. - Maintain multiple graphs. - Sort `@type` when looking for scoped contexts. +- Emit only valid N-Quads from toRdf. + - **Note**: This could have a performance impact. ### Changed - Use JSON-LD WG tests. diff --git a/lib/toRdf.js b/lib/toRdf.js index e6a3281f..020dba24 100644 --- a/lib/toRdf.js +++ b/lib/toRdf.js @@ -34,6 +34,11 @@ const { isAbsolute: _isAbsoluteIri } = require('./url'); +const _HEX = '[0-9A-Fa-f]'; +const _UCHAR = '\\u' + _HEX + '{4}|\\U' + _HEX + '{8}'; +const IRIREF_RE = new RegExp('^([^\\x00-\\x20<>"{}|^`\\\\]|' + _UCHAR + ')*$'); +const LANG_RE = /^[a-zA-Z]+(-[a-zA-Z0-9]+)*$/; + const api = {}; module.exports = api; @@ -58,6 +63,11 @@ api.toRDF = (input, options) => { if(graphName === '@default') { graphTerm = {termType: 'DefaultGraph', value: ''}; } else if(_isAbsoluteIri(graphName)) { + // invalid graph IRI + if(!IRIREF_RE.test(graphName)) { + continue; + } + if(graphName.startsWith('_:')) { graphTerm = {termType: 'BlankNode'}; } else { @@ -110,6 +120,11 @@ function _graphToRDF(dataset, graph, graphTerm, issuer, options) { continue; } + // invalid subject IRI + if(!IRIREF_RE.test(id)) { + continue; + } + // RDF predicate const predicate = { termType: property.startsWith('_:') ? 'BlankNode' : 'NamedNode', @@ -121,6 +136,11 @@ function _graphToRDF(dataset, graph, graphTerm, issuer, options) { continue; } + // invalid predicate IRI + if(!IRIREF_RE.test(property)) { + continue; + } + // skip blank node predicates unless producing generalized RDF if(predicate.termType === 'BlankNode' && !options.produceGeneralizedRdf) { @@ -226,6 +246,11 @@ function _objectToRDF(item, issuer, dataset, graphTerm) { let value = item['@value']; const datatype = item['@type'] || null; + // invalid datatype IRI + if(datatype && !IRIREF_RE.test(datatype)) { + return null; + } + // convert to XSD/JSON datatypes as appropriate if(datatype === '@json') { object.value = jsonCanonicalize(value); @@ -244,6 +269,9 @@ function _objectToRDF(item, issuer, dataset, graphTerm) { object.value = value.toFixed(0); object.datatype.value = datatype || XSD_INTEGER; } else if('@language' in item) { + if(!LANG_RE.test(item['@language'])) { + return null; + } object.value = value; object.datatype.value = datatype || RDF_LANGSTRING; object.language = item['@language']; @@ -258,6 +286,12 @@ function _objectToRDF(item, issuer, dataset, graphTerm) { } else { // convert string/node object to RDF const id = types.isObject(item) ? item['@id'] : item; + + // invalid object IRI + if(!IRIREF_RE.test(id)) { + return null; + } + object.termType = id.startsWith('_:') ? 'BlankNode' : 'NamedNode'; object.value = id; }