diff --git a/common/extract-examples.rb b/common/extract-examples.rb
index 5035f657..2f9d2bfd 100755
--- a/common/extract-examples.rb
+++ b/common/extract-examples.rb
@@ -15,6 +15,7 @@
require 'fileutils'
require 'colorize'
require 'yaml'
+require 'cgi'
PREFIXES = {
dc: "http://purl.org/dc/terms/",
@@ -49,8 +50,8 @@
# Remove highlighting and commented out sections
def justify(str)
str = str.
- sub(/^\s*\s*$/, '').
+ gsub(/^\s*\s*$/, '').
gsub('****', '').
gsub(/####([^#]*)####/, '')
@@ -222,7 +223,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
examples[title] = {
title: title,
filename: fn,
- content: content,
+ content: content.to_s.gsub(/^\s*< !\s*-\s*-/, ''),
content_type: element.attr('data-content-type'),
number: example_number,
ext: ext,
@@ -302,6 +303,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
# Perform example syntactic validation based on extension
case ex[:ext]
when 'json', 'jsonld', 'jsonldf'
+ content = CGI.unescapeHTML(content)
begin
::JSON.parse(content)
rescue JSON::ParserError => exception
@@ -325,22 +327,16 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
ex[:base] = html_base.to_s if html_base
script_content = doc.at_xpath(xpath)
- if script_content
- # Remove (faked) XML comments and unescape sequences
- content = script_content
- .inner_html
- .sub(/^\s*< !\s*-\s*-/, '')
- .sub(/-\s*- >\s*$/, '')
- .gsub(/</, '<')
- end
-
+
+ # Remove (faked) XML comments and unescape sequences
+ content = CGI.unescapeHTML(script_content.inner_html) if script_content
rescue Nokogiri::XML::SyntaxError => exception
errors << "Example #{ex[:number]} at line #{ex[:line]} parse error: #{exception.message}"
$stdout.write "F".colorize(:red)
next
end
when 'table'
- # already in parsed form
+ content = Nokogiri::HTML.parse(content)
when 'ttl', 'trig'
begin
reader_errors = []
@@ -443,10 +439,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
# Set argument to referenced content to be parsed
args[0] = if examples[ex[:result_for]][:ext] == 'html' && method == :expand
# If we are expanding, and the reference is HTML, find the first script element.
- doc = Nokogiri::HTML.parse(
- examples[ex[:result_for]][:content]
- .sub(/^\s*< !\s*-\s*-/, '')
- .sub(/-\s*- >\s*$/, ''))
+ doc = Nokogiri::HTML.parse(examples[ex[:result_for]][:content])
# Get base from document, if present
html_base = doc.at_xpath('/html/head/base/@href')
@@ -458,15 +451,10 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
$stdout.write "F".colorize(:red)
next
end
- StringIO.new(script_content
- .inner_html
- .gsub(/</, '<'))
+ StringIO.new(CGI.unescapeHTML(script_content.inner_html))
elsif examples[ex[:result_for]][:ext] == 'html' && ex[:target]
# Only use the targeted script
- doc = Nokogiri::HTML.parse(
- examples[ex[:result_for]][:content]
- .sub(/^\s*< !\s*-\s*-/, '')
- .sub(/-\s*- >\s*$/, ''))
+ doc = Nokogiri::HTML.parse(examples[ex[:result_for]][:content])
script_content = doc.at_xpath(xpath)
unless script_content
errors << "Example #{ex[:number]} at line #{ex[:line]} references example #{ex[:result_for].inspect} with no JSON-LD script element"
@@ -565,7 +553,7 @@ def save_example(examples:, element:, title:, example_number:, error:, warn:)
$stderr.puts "expected:\n" + expected.to_trig if verbose
when 'table'
expected = begin
- table_to_dataset(content)
+ table_to_dataset(content.xpath('/html/body/table'))
rescue
errors << "Example #{ex[:number]} at line #{ex[:line]} raised error reading table: #{$!}"
RDF::Dataset.new
diff --git a/index.html b/index.html
index 8d0d8064..8375adc1 100644
--- a/index.html
+++ b/index.html
@@ -4671,40 +4671,15 @@
Extract Script Content Algorithm
Algorithm
The algorithm takes a single required input variable: source,
the textContent of an HTML script element.
- For the purpose of this algorithm, the following tokens are defined in [[ABNF]]:
-
-
- space-character = %20 ; SPACE
- / %09 ; CHARACTER TABULATION (tab)
- / %0A ; LINE FEED (LF)
- / %0C ; FORM FEED (FF)
- / %0D ; CARRIAGE RETURN (CR)
- comment-open = *space-character "<!--"
*space-character
- comment-close = *space-character "-->"
*space-character
-
- - If source begins with comment-open and ends with comment-close,
- remove those sequences from source.
- - If source contains comment-open or comment-close,
- an invalid script element has been detected, and processing is aborted.
- - For all occurances of the any of the character sequences
-
<\script
,
- <\/script
,
- <\!--
,
- or --\>
- in source using a case-insenstive match,
- replace the sequence with the equivalent sequence excluding the REVERSE SOLIDUS (\
).
- For all occurances of a HTML Character reference in source,
replace the sequence with the equivalent Unicode character as defined
in Named character references in [[HTML52]].
@@ -4866,6 +4841,14 @@ The JsonLdProcessor Interface
a string representing the IRI of a remote document,
extract the content of the JSON-LD script element(s) into original input:
+ - Set base IRI to the the Document Base URL
+ of original input, as defined in [[HTML52]],
+ using the existing base IRI as the document's URL.
+
+ The use of the
Document Base URL
+ from [[HTML52]] for setting the
base IRI of the enclosed JSON-LD
+ is an experimental feature, which may be changed in a future version of this specification.
+
- If the original passed input parameter
contains a fragment identifier,
set source to the textContent
diff --git a/tests/expand-manifest.jsonld b/tests/expand-manifest.jsonld
index dde9c18d..5facfbd8 100644
--- a/tests/expand-manifest.jsonld
+++ b/tests/expand-manifest.jsonld
@@ -1353,22 +1353,6 @@
"input": "expand/h007-in.html",
"expect": "expand/h007-out.jsonld",
"option": {"specVersion": "json-ld-1.1", "extractAllScripts": true}
- }, {
- "@id": "#th008",
- "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
- "name": "Expands embedded JSON-LD script element with comments",
- "purpose": "Tests embedded JSON-LD in HTML with comments",
- "input": "expand/h008-in.html",
- "expect": "expand/h008-out.jsonld",
- "option": {"specVersion": "json-ld-1.1"}
- }, {
- "@id": "#th009",
- "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
- "name": "Expands embedded JSON-LD script element with escaped tokens",
- "purpose": "Tests embedded JSON-LD in HTML with escapes",
- "input": "expand/h009-in.html",
- "expect": "expand/h009-out.jsonld",
- "option": {"specVersion": "json-ld-1.1"}
}, {
"@id": "#th010",
"@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
@@ -1433,6 +1417,46 @@
"input": "expand/h017-in.html",
"expect": "invalid script element",
"option": {"specVersion": "json-ld-1.1"}
+ }, {
+ "@id": "#th018",
+ "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
+ "name": "Expands embedded JSON-LD script element relative to document base",
+ "purpose": "Tests embedded JSON-LD in HTML",
+ "input": "expand/h018-in.html",
+ "expect": "expand/h018-out.jsonld",
+ "option": {"specVersion": "json-ld-1.1"}
+ }, {
+ "@id": "#th019",
+ "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
+ "name": "Expands embedded JSON-LD script element relative to base option",
+ "purpose": "Tests embedded JSON-LD in HTML",
+ "input": "expand/h019-in.html",
+ "expect": "expand/h019-out.jsonld",
+ "option": {"specVersion": "json-ld-1.1", "base": "http://a.example.com/doc"}
+ }, {
+ "@id": "#th020",
+ "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
+ "name": "Expands embedded JSON-LD script element relative to HTML base",
+ "purpose": "Tests embedded JSON-LD in HTML",
+ "input": "expand/h020-in.html",
+ "expect": "expand/h020-out.jsonld",
+ "option": {"specVersion": "json-ld-1.1", "base": "http://a.example.com/doc"}
+ }, {
+ "@id": "#th021",
+ "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
+ "name": "Expands embedded JSON-LD script element relative to relative HTML base",
+ "purpose": "Tests embedded JSON-LD in HTML",
+ "input": "expand/h021-in.html",
+ "expect": "expand/h021-out.jsonld",
+ "option": {"specVersion": "json-ld-1.1", "base": "http://a.example.com/doc"}
+ }, {
+ "@id": "#th022",
+ "@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
+ "name": "Expands targeted JSON-LD script element with fragment and HTML base",
+ "purpose": "Tests embedded JSON-LD in HTML with fragment identifier",
+ "input": "expand/h022-in.html#second",
+ "expect": "expand/h022-out.jsonld",
+ "option": {"specVersion": "json-ld-1.1"}
}, {
"@id": "#tm001",
"@type": ["jld:PositiveEvaluationTest", "jld:ExpandTest"],
diff --git a/tests/expand/h018-in.html b/tests/expand/h018-in.html
new file mode 100644
index 00000000..8a644238
--- /dev/null
+++ b/tests/expand/h018-in.html
@@ -0,0 +1,13 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/expand/h018-out.jsonld b/tests/expand/h018-out.jsonld
new file mode 100644
index 00000000..7bf2e224
--- /dev/null
+++ b/tests/expand/h018-out.jsonld
@@ -0,0 +1,4 @@
+[{
+ "@id": "https://w3c.github.io/json-ld-api/tests/expand/h018-in.html",
+ "http://example.com/foo": [{"@value": "bar"}]
+}]
\ No newline at end of file
diff --git a/tests/expand/h019-in.html b/tests/expand/h019-in.html
new file mode 100644
index 00000000..8a644238
--- /dev/null
+++ b/tests/expand/h019-in.html
@@ -0,0 +1,13 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/expand/h019-out.jsonld b/tests/expand/h019-out.jsonld
new file mode 100644
index 00000000..1617d64f
--- /dev/null
+++ b/tests/expand/h019-out.jsonld
@@ -0,0 +1,4 @@
+[{
+ "@id": "http://a.example.com/doc",
+ "http://example.com/foo": [{"@value": "bar"}]
+}]
\ No newline at end of file
diff --git a/tests/expand/h020-in.html b/tests/expand/h020-in.html
new file mode 100644
index 00000000..c7fdfb1b
--- /dev/null
+++ b/tests/expand/h020-in.html
@@ -0,0 +1,14 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/expand/h020-out.jsonld b/tests/expand/h020-out.jsonld
new file mode 100644
index 00000000..f309cd84
--- /dev/null
+++ b/tests/expand/h020-out.jsonld
@@ -0,0 +1,4 @@
+[{
+ "@id": "http://a.example.com/base",
+ "http://example.com/foo": [{"@value": "bar"}]
+}]
\ No newline at end of file
diff --git a/tests/expand/h021-in.html b/tests/expand/h021-in.html
new file mode 100644
index 00000000..25b5e3cf
--- /dev/null
+++ b/tests/expand/h021-in.html
@@ -0,0 +1,14 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/expand/h021-out.jsonld b/tests/expand/h021-out.jsonld
new file mode 100644
index 00000000..f309cd84
--- /dev/null
+++ b/tests/expand/h021-out.jsonld
@@ -0,0 +1,4 @@
+[{
+ "@id": "http://a.example.com/base",
+ "http://example.com/foo": [{"@value": "bar"}]
+}]
\ No newline at end of file
diff --git a/tests/expand/h022-in.html b/tests/expand/h022-in.html
new file mode 100644
index 00000000..0d18ab9d
--- /dev/null
+++ b/tests/expand/h022-in.html
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/expand/h022-out.jsonld b/tests/expand/h022-out.jsonld
new file mode 100644
index 00000000..bbfd82eb
--- /dev/null
+++ b/tests/expand/h022-out.jsonld
@@ -0,0 +1,4 @@
+[{
+ "@id": "http://a.example.com/base",
+ "http://example.com/bar": [{"@value": "foo"}]
+}]
\ No newline at end of file