Skip to content

Commit 57e7c9c

Browse files
committed
Remove support for embedded JSON-LD inside XML comments and the use of script escape sequences. This supports w3c/json-ld-syntax#97.
1 parent eff3e26 commit 57e7c9c

File tree

3 files changed

+14
-67
lines changed

3 files changed

+14
-67
lines changed

common/extract-examples.rb

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
require 'fileutils'
1616
require 'colorize'
1717
require 'yaml'
18+
require 'cgi'
1819

1920
PREFIXES = {
2021
dc: "http://purl.org/dc/terms/",
@@ -49,8 +50,8 @@
4950
# Remove highlighting and commented out sections
5051
def justify(str)
5152
str = str.
52-
sub(/^\s*<!--\s*$/, '').
53-
sub(/^\s*-->\s*$/, '').
53+
gsub(/^\s*<!--\s*$/, '').
54+
gsub(/^\s*-->\s*$/, '').
5455
gsub('****', '').
5556
gsub(/####([^#]*)####/, '')
5657

@@ -222,7 +223,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
222223
examples[title] = {
223224
title: title,
224225
filename: fn,
225-
content: content,
226+
content: content.to_s.gsub(/^\s*< !\s*-\s*-/, '<!--').gsub(/-\s*- >/, '-->'),
226227
content_type: element.attr('data-content-type'),
227228
number: example_number,
228229
ext: ext,
@@ -302,6 +303,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
302303
# Perform example syntactic validation based on extension
303304
case ex[:ext]
304305
when 'json', 'jsonld', 'jsonldf'
306+
content = CGI.unescapeHTML(content)
305307
begin
306308
::JSON.parse(content)
307309
rescue JSON::ParserError => exception
@@ -325,22 +327,16 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
325327
ex[:base] = html_base.to_s if html_base
326328

327329
script_content = doc.at_xpath(xpath)
328-
if script_content
329-
# Remove (faked) XML comments and unescape sequences
330-
content = script_content
331-
.inner_html
332-
.sub(/^\s*< !\s*-\s*-/, '')
333-
.sub(/-\s*- >\s*$/, '')
334-
.gsub(/&lt;/, '<')
335-
end
336-
330+
331+
# Remove (faked) XML comments and unescape sequences
332+
content = CGI.unescapeHTML(script_content.inner_html) if script_content
337333
rescue Nokogiri::XML::SyntaxError => exception
338334
errors << "Example #{ex[:number]} at line #{ex[:line]} parse error: #{exception.message}"
339335
$stdout.write "F".colorize(:red)
340336
next
341337
end
342338
when 'table'
343-
# already in parsed form
339+
content = Nokogiri::HTML.parse(content)
344340
when 'ttl', 'trig'
345341
begin
346342
reader_errors = []
@@ -443,10 +439,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
443439
# Set argument to referenced content to be parsed
444440
args[0] = if examples[ex[:result_for]][:ext] == 'html' && method == :expand
445441
# If we are expanding, and the reference is HTML, find the first script element.
446-
doc = Nokogiri::HTML.parse(
447-
examples[ex[:result_for]][:content]
448-
.sub(/^\s*< !\s*-\s*-/, '')
449-
.sub(/-\s*- >\s*$/, ''))
442+
doc = Nokogiri::HTML.parse(examples[ex[:result_for]][:content])
450443

451444
# Get base from document, if present
452445
html_base = doc.at_xpath('/html/head/base/@href')
@@ -458,15 +451,10 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
458451
$stdout.write "F".colorize(:red)
459452
next
460453
end
461-
StringIO.new(script_content
462-
.inner_html
463-
.gsub(/&lt;/, '<'))
454+
StringIO.new(CGI.unescapeHTML(script_content.inner_html))
464455
elsif examples[ex[:result_for]][:ext] == 'html' && ex[:target]
465456
# Only use the targeted script
466-
doc = Nokogiri::HTML.parse(
467-
examples[ex[:result_for]][:content]
468-
.sub(/^\s*< !\s*-\s*-/, '')
469-
.sub(/-\s*- >\s*$/, ''))
457+
doc = Nokogiri::HTML.parse(examples[ex[:result_for]][:content])
470458
script_content = doc.at_xpath(xpath)
471459
unless script_content
472460
errors << "Example #{ex[:number]} at line #{ex[:line]} references example #{ex[:result_for].inspect} with no JSON-LD script element"
@@ -565,7 +553,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
565553
$stderr.puts "expected:\n" + expected.to_trig if verbose
566554
when 'table'
567555
expected = begin
568-
table_to_dataset(content)
556+
table_to_dataset(content.xpath('/html/body/table'))
569557
rescue
570558
errors << "Example #{ex[:number]} at line #{ex[:line]} raised error reading table: #{$!}"
571559
RDF::Dataset.new

index.html

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4671,40 +4671,15 @@ <h3>Extract Script Content Algorithm</h3>
46714671

46724672
<section class="informative">
46734673
<h4>Overview</h4>
4674-
<p>As a <a data-cite="HTML52/semantics-scripting.html#data-block">data block</a>
4675-
may be inside a comment, and may be escaped, the algorithm extracts the JSON from any comment,
4676-
removes REVERSE SOLIDUS escapes,
4677-
and reverses <a data-cite="HTML5/syntax.html#character-references">HTML Character references</a>.
4674+
<p>The algorithm reverses <a data-cite="HTML5/syntax.html#character-references">HTML Character references</a>.
46784675
</section>
46794676

46804677
<section>
46814678
<h4>Algorithm</h4>
46824679
<p>The algorithm takes a single required input variable: <var>source</var>,
46834680
the <a data-cite="DOM#dom-node-textcontent">textContent</a> of an HTML <a data-cite="HTML52/semantics-scripting.html#the-script-element">script element</a>.</p>
4684-
<p>For the purpose of this algorithm, the following tokens are defined in [[ABNF]]:</p>
4685-
4686-
<pre class="nohighlight">
4687-
<dfn>space-character</dfn> = %20 ; SPACE
4688-
/ %09 ; CHARACTER TABULATION (tab)
4689-
/ %0A ; LINE FEED (LF)
4690-
/ %0C ; FORM FEED (FF)
4691-
/ %0D ; CARRIAGE RETURN (CR)
4692-
<dfn>comment-open</dfn> = *<a>space-character</a> <code>"&lt;!--"</code> *<a>space-character</a>
4693-
<dfn>comment-close</dfn> = *<a>space-character</a> <code>"--&gt;"</code> *<a>space-character</a>
4694-
</pre>
46954681

46964682
<ol>
4697-
<li>If <var>source</var> begins with <a>comment-open</a> and ends with <a>comment-close</a>,
4698-
remove those sequences from <var>source</var>.</li>
4699-
<li>If <var>source</var> contains <a>comment-open</a> or <a>comment-close</a>,
4700-
an <a data-link-for="JsonLdErrorCode">invalid script element</a> has been detected, and processing is aborted.</li>
4701-
<li>For all occurances of the any of the character sequences
4702-
<code>&lt;\script</code>,
4703-
<code>&lt;\/script</code>,
4704-
<code>&lt;\!--</code>,
4705-
or <code>--\&gt;</code>
4706-
in <var>source</var> using a case-insenstive match,
4707-
replace the sequence with the equivalent sequence excluding the REVERSE SOLIDUS (<code>\</code>).</li>
47084683
<li>For all occurances of a <a data-cite="HTML5/syntax.html#character-references">HTML Character reference</a> in <var>source</var>,
47094684
replace the sequence with the equivalent Unicode character as defined
47104685
in <a data-cite="HTML52/syntax.html#named-character-references">Named character references</a> in [[HTML52]].</li>

tests/expand-manifest.jsonld

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,22 +1353,6 @@
13531353
"input": "expand/h007-in.html",
13541354
"expect": "expand/h007-out.jsonld",
13551355
"option": {"specVersion": "json-ld-1.1", "extractAllScripts": true}
1356-
}, {
1357-
"@id": "#th008",
1358-
"@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
1359-
"name": "Expands embedded JSON-LD script element with comments",
1360-
"purpose": "Tests embedded JSON-LD in HTML with comments",
1361-
"input": "expand/h008-in.html",
1362-
"expect": "expand/h008-out.jsonld",
1363-
"option": {"specVersion": "json-ld-1.1"}
1364-
}, {
1365-
"@id": "#th009",
1366-
"@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
1367-
"name": "Expands embedded JSON-LD script element with escaped tokens",
1368-
"purpose": "Tests embedded JSON-LD in HTML with escapes",
1369-
"input": "expand/h009-in.html",
1370-
"expect": "expand/h009-out.jsonld",
1371-
"option": {"specVersion": "json-ld-1.1"}
13721356
}, {
13731357
"@id": "#th010",
13741358
"@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],

0 commit comments

Comments
 (0)