From e5327b83a742b73756f953b1bd5ca90bea0040de Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 27 Nov 2018 13:44:26 -0800 Subject: [PATCH 1/4] Text and tests for using HTML base for embedded JSON-LD. For w3c/json-ld-syntax#23. --- index.html | 8 ++++++++ tests/expand-manifest.jsonld | 32 ++++++++++++++++++++++++++++++++ tests/expand/h018-in.html | 13 +++++++++++++ tests/expand/h018-out.jsonld | 4 ++++ tests/expand/h019-in.html | 13 +++++++++++++ tests/expand/h019-out.jsonld | 4 ++++ tests/expand/h020-in.html | 14 ++++++++++++++ tests/expand/h020-out.jsonld | 4 ++++ tests/expand/h021-in.html | 14 ++++++++++++++ tests/expand/h021-out.jsonld | 4 ++++ 10 files changed, 110 insertions(+) create mode 100644 tests/expand/h018-in.html create mode 100644 tests/expand/h018-out.jsonld create mode 100644 tests/expand/h019-in.html create mode 100644 tests/expand/h019-out.jsonld create mode 100644 tests/expand/h020-in.html create mode 100644 tests/expand/h020-out.jsonld create mode 100644 tests/expand/h021-in.html create mode 100644 tests/expand/h021-out.jsonld diff --git a/index.html b/index.html index 8d0d8064..da15a726 100644 --- a/index.html +++ b/index.html @@ -4866,6 +4866,14 @@

The JsonLdProcessor Interface

a string representing the IRI of a remote document, extract the content of the JSON-LD script element(s) into original input:
    +
  1. Set base IRI to the the Document Base URL + of original input, as defined in [[HTML52]], + using the existing base IRI as the document's URL. +
    + The use of the Document Base URL + from [[HTML52]] for setting the base IRI of the enclosed JSON-LD + is an experimental feature, which may be changed in a future version of this specification. +
  2. If the original passed input parameter contains a fragment identifier, set source to the textContent diff --git a/tests/expand-manifest.jsonld b/tests/expand-manifest.jsonld index dde9c18d..b88731b7 100644 --- a/tests/expand-manifest.jsonld +++ b/tests/expand-manifest.jsonld @@ -1433,6 +1433,38 @@ "input": "expand/h017-in.html", "expect": "invalid script element", "option": {"specVersion": "json-ld-1.1"} + }, { + "@id": "#th018", + "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], + "name": "Expands embedded JSON-LD script element relative to document base", + "purpose": "Tests embedded JSON-LD in HTML", + "input": "expand/h018-in.html", + "expect": "expand/h018-out.jsonld", + "option": {"specVersion": "json-ld-1.1"} + }, { + "@id": "#th019", + "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], + "name": "Expands embedded JSON-LD script element relative to base option", + "purpose": "Tests embedded JSON-LD in HTML", + "input": "expand/h019-in.html", + "expect": "expand/h019-out.jsonld", + "option": {"specVersion": "json-ld-1.1", "base": "http://example.org/doc"} + }, { + "@id": "#th020", + "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], + "name": "Expands embedded JSON-LD script element relative to HTML base", + "purpose": "Tests embedded JSON-LD in HTML", + "input": "expand/h020-in.html", + "expect": "expand/h020-out.jsonld", + "option": {"specVersion": "json-ld-1.1", "base": "http://example.org/doc"} + }, { + "@id": "#th021", + "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], + "name": "Expands embedded JSON-LD script element relative to relative HTML base", + "purpose": "Tests embedded JSON-LD in HTML", + "input": "expand/h021-in.html", + "expect": "expand/h021-out.jsonld", + "option": {"specVersion": "json-ld-1.1", "base": "http://example.org/doc"} }, { "@id": "#tm001", "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], diff --git a/tests/expand/h018-in.html b/tests/expand/h018-in.html new file mode 100644 index 00000000..8a644238 --- /dev/null +++ b/tests/expand/h018-in.html @@ -0,0 +1,13 @@ + + + + + \ No newline at end of file diff --git a/tests/expand/h018-out.jsonld b/tests/expand/h018-out.jsonld new file mode 100644 index 00000000..7bf2e224 --- /dev/null +++ b/tests/expand/h018-out.jsonld @@ -0,0 +1,4 @@ +[{ + "@id": "https://w3c.github.io/json-ld-api/tests/expand/h018-in.html", + "http://example.com/foo": [{"@value": "bar"}] +}] \ No newline at end of file diff --git a/tests/expand/h019-in.html b/tests/expand/h019-in.html new file mode 100644 index 00000000..8a644238 --- /dev/null +++ b/tests/expand/h019-in.html @@ -0,0 +1,13 @@ + + + + + \ No newline at end of file diff --git a/tests/expand/h019-out.jsonld b/tests/expand/h019-out.jsonld new file mode 100644 index 00000000..98a93903 --- /dev/null +++ b/tests/expand/h019-out.jsonld @@ -0,0 +1,4 @@ +[{ + "@id": "http://example.org/doc", + "http://example.com/foo": [{"@value": "bar"}] +}] \ No newline at end of file diff --git a/tests/expand/h020-in.html b/tests/expand/h020-in.html new file mode 100644 index 00000000..134b41b0 --- /dev/null +++ b/tests/expand/h020-in.html @@ -0,0 +1,14 @@ + + + + + + \ No newline at end of file diff --git a/tests/expand/h020-out.jsonld b/tests/expand/h020-out.jsonld new file mode 100644 index 00000000..1897d265 --- /dev/null +++ b/tests/expand/h020-out.jsonld @@ -0,0 +1,4 @@ +[{ + "@id": "http://example.org/base", + "http://example.com/foo": [{"@value": "bar"}] +}] \ No newline at end of file diff --git a/tests/expand/h021-in.html b/tests/expand/h021-in.html new file mode 100644 index 00000000..25b5e3cf --- /dev/null +++ b/tests/expand/h021-in.html @@ -0,0 +1,14 @@ + + + + + + \ No newline at end of file diff --git a/tests/expand/h021-out.jsonld b/tests/expand/h021-out.jsonld new file mode 100644 index 00000000..1897d265 --- /dev/null +++ b/tests/expand/h021-out.jsonld @@ -0,0 +1,4 @@ +[{ + "@id": "http://example.org/base", + "http://example.com/foo": [{"@value": "bar"}] +}] \ No newline at end of file From 4f58448a4cd793f0a204cf688ab46189cfa5532d Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 27 Nov 2018 16:13:33 -0800 Subject: [PATCH 2/4] Use a.example.com instead of example.org. --- tests/expand-manifest.jsonld | 6 +++--- tests/expand/h019-out.jsonld | 2 +- tests/expand/h020-in.html | 2 +- tests/expand/h020-out.jsonld | 2 +- tests/expand/h021-out.jsonld | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/expand-manifest.jsonld b/tests/expand-manifest.jsonld index b88731b7..de2bea88 100644 --- a/tests/expand-manifest.jsonld +++ b/tests/expand-manifest.jsonld @@ -1448,7 +1448,7 @@ "purpose": "Tests embedded JSON-LD in HTML", "input": "expand/h019-in.html", "expect": "expand/h019-out.jsonld", - "option": {"specVersion": "json-ld-1.1", "base": "http://example.org/doc"} + "option": {"specVersion": "json-ld-1.1", "base": "http://a.example.com/doc"} }, { "@id": "#th020", "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], @@ -1456,7 +1456,7 @@ "purpose": "Tests embedded JSON-LD in HTML", "input": "expand/h020-in.html", "expect": "expand/h020-out.jsonld", - "option": {"specVersion": "json-ld-1.1", "base": "http://example.org/doc"} + "option": {"specVersion": "json-ld-1.1", "base": "http://a.example.com/doc"} }, { "@id": "#th021", "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], @@ -1464,7 +1464,7 @@ "purpose": "Tests embedded JSON-LD in HTML", "input": "expand/h021-in.html", "expect": "expand/h021-out.jsonld", - "option": {"specVersion": "json-ld-1.1", "base": "http://example.org/doc"} + "option": {"specVersion": "json-ld-1.1", "base": "http://a.example.com/doc"} }, { "@id": "#tm001", "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], diff --git a/tests/expand/h019-out.jsonld b/tests/expand/h019-out.jsonld index 98a93903..1617d64f 100644 --- a/tests/expand/h019-out.jsonld +++ b/tests/expand/h019-out.jsonld @@ -1,4 +1,4 @@ [{ - "@id": "http://example.org/doc", + "@id": "http://a.example.com/doc", "http://example.com/foo": [{"@value": "bar"}] }] \ No newline at end of file diff --git a/tests/expand/h020-in.html b/tests/expand/h020-in.html index 134b41b0..c7fdfb1b 100644 --- a/tests/expand/h020-in.html +++ b/tests/expand/h020-in.html @@ -1,6 +1,6 @@ - + + + + \ No newline at end of file diff --git a/tests/expand/h022-out.jsonld b/tests/expand/h022-out.jsonld new file mode 100644 index 00000000..bbfd82eb --- /dev/null +++ b/tests/expand/h022-out.jsonld @@ -0,0 +1,4 @@ +[{ + "@id": "http://a.example.com/base", + "http://example.com/bar": [{"@value": "foo"}] +}] \ No newline at end of file From b969d200d5e399396d4089f1d41dccd0b3c43ab2 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 28 Nov 2018 16:24:46 -0800 Subject: [PATCH 4/4] Remove support for embedded JSON-LD inside XML comments and the use of script escape sequences. This supports w3c/json-ld-syntax#97. --- common/extract-examples.rb | 38 ++++++++++++------------------------ index.html | 27 +------------------------ tests/expand-manifest.jsonld | 16 --------------- 3 files changed, 14 insertions(+), 67 deletions(-) diff --git a/common/extract-examples.rb b/common/extract-examples.rb index 5035f657..2f9d2bfd 100755 --- a/common/extract-examples.rb +++ b/common/extract-examples.rb @@ -15,6 +15,7 @@ require 'fileutils' require 'colorize' require 'yaml' +require 'cgi' PREFIXES = { dc: "http://purl.org/dc/terms/", @@ -49,8 +50,8 @@ # Remove highlighting and commented out sections def justify(str) str = str. - sub(/^\s*\s*$/, ''). + gsub(/^\s*\s*$/, ''). gsub('****', ''). gsub(/####([^#]*)####/, '') @@ -222,7 +223,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:) examples[title] = { title: title, filename: fn, - content: content, + content: content.to_s.gsub(/^\s*< !\s*-\s*-/, ''), content_type: element.attr('data-content-type'), number: example_number, ext: ext, @@ -302,6 +303,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:) # Perform example syntactic validation based on extension case ex[:ext] when 'json', 'jsonld', 'jsonldf' + content = CGI.unescapeHTML(content) begin ::JSON.parse(content) rescue JSON::ParserError => exception @@ -325,22 +327,16 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:) ex[:base] = html_base.to_s if html_base script_content = doc.at_xpath(xpath) - if script_content - # Remove (faked) XML comments and unescape sequences - content = script_content - .inner_html - .sub(/^\s*< !\s*-\s*-/, '') - .sub(/-\s*- >\s*$/, '') - .gsub(/</, '<') - end - + + # Remove (faked) XML comments and unescape sequences + content = CGI.unescapeHTML(script_content.inner_html) if script_content rescue Nokogiri::XML::SyntaxError => exception errors << "Example #{ex[:number]} at line #{ex[:line]} parse error: #{exception.message}" $stdout.write "F".colorize(:red) next end when 'table' - # already in parsed form + content = Nokogiri::HTML.parse(content) when 'ttl', 'trig' begin reader_errors = [] @@ -443,10 +439,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:) # Set argument to referenced content to be parsed args[0] = if examples[ex[:result_for]][:ext] == 'html' && method == :expand # If we are expanding, and the reference is HTML, find the first script element. - doc = Nokogiri::HTML.parse( - examples[ex[:result_for]][:content] - .sub(/^\s*< !\s*-\s*-/, '') - .sub(/-\s*- >\s*$/, '')) + doc = Nokogiri::HTML.parse(examples[ex[:result_for]][:content]) # Get base from document, if present html_base = doc.at_xpath('/html/head/base/@href') @@ -458,15 +451,10 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:) $stdout.write "F".colorize(:red) next end - StringIO.new(script_content - .inner_html - .gsub(/</, '<')) + StringIO.new(CGI.unescapeHTML(script_content.inner_html)) elsif examples[ex[:result_for]][:ext] == 'html' && ex[:target] # Only use the targeted script - doc = Nokogiri::HTML.parse( - examples[ex[:result_for]][:content] - .sub(/^\s*< !\s*-\s*-/, '') - .sub(/-\s*- >\s*$/, '')) + doc = Nokogiri::HTML.parse(examples[ex[:result_for]][:content]) script_content = doc.at_xpath(xpath) unless script_content errors << "Example #{ex[:number]} at line #{ex[:line]} references example #{ex[:result_for].inspect} with no JSON-LD script element" @@ -565,7 +553,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:) $stderr.puts "expected:\n" + expected.to_trig if verbose when 'table' expected = begin - table_to_dataset(content) + table_to_dataset(content.xpath('/html/body/table')) rescue errors << "Example #{ex[:number]} at line #{ex[:line]} raised error reading table: #{$!}" RDF::Dataset.new diff --git a/index.html b/index.html index da15a726..8375adc1 100644 --- a/index.html +++ b/index.html @@ -4671,40 +4671,15 @@

    Extract Script Content Algorithm

    Overview

    -

    As a data block - may be inside a comment, and may be escaped, the algorithm extracts the JSON from any comment, - removes REVERSE SOLIDUS escapes, - and reverses HTML Character references. +

    The algorithm reverses HTML Character references.

    Algorithm

    The algorithm takes a single required input variable: source, the textContent of an HTML script element.

    -

    For the purpose of this algorithm, the following tokens are defined in [[ABNF]]:

    - -
    -  space-character = %20 ; SPACE
    -                  / %09 ; CHARACTER TABULATION (tab)
    -                  / %0A ; LINE FEED (LF)
    -                  / %0C ; FORM FEED (FF)
    -                  / %0D ; CARRIAGE RETURN (CR)
    -  comment-open    = *space-character "<!--" *space-character
    -  comment-close   = *space-character "-->" *space-character
    -      
      -
    1. If source begins with comment-open and ends with comment-close, - remove those sequences from source.
    2. -
    3. If source contains comment-open or comment-close, - an invalid script element has been detected, and processing is aborted.
    4. -
    5. For all occurances of the any of the character sequences - <\script, - <\/script, - <\!--, - or --\> - in source using a case-insenstive match, - replace the sequence with the equivalent sequence excluding the REVERSE SOLIDUS (\).
    6. For all occurances of a HTML Character reference in source, replace the sequence with the equivalent Unicode character as defined in Named character references in [[HTML52]].
    7. diff --git a/tests/expand-manifest.jsonld b/tests/expand-manifest.jsonld index 16d18c4c..5facfbd8 100644 --- a/tests/expand-manifest.jsonld +++ b/tests/expand-manifest.jsonld @@ -1353,22 +1353,6 @@ "input": "expand/h007-in.html", "expect": "expand/h007-out.jsonld", "option": {"specVersion": "json-ld-1.1", "extractAllScripts": true} - }, { - "@id": "#th008", - "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], - "name": "Expands embedded JSON-LD script element with comments", - "purpose": "Tests embedded JSON-LD in HTML with comments", - "input": "expand/h008-in.html", - "expect": "expand/h008-out.jsonld", - "option": {"specVersion": "json-ld-1.1"} - }, { - "@id": "#th009", - "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"], - "name": "Expands embedded JSON-LD script element with escaped tokens", - "purpose": "Tests embedded JSON-LD in HTML with escapes", - "input": "expand/h009-in.html", - "expect": "expand/h009-out.jsonld", - "option": {"specVersion": "json-ld-1.1"} }, { "@id": "#th010", "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],