From cf7e61fe9780497e39d0e2a1d53b7c8326ac63f1 Mon Sep 17 00:00:00 2001 From: Akshay Kumar Date: Tue, 5 Nov 2024 03:26:20 +0530 Subject: [PATCH] add option http://cyberneko.org/html/features/scanner/cdata-early-closing --- .../java/org/htmlunit/cyberneko/HTMLScanner.java | 13 ++++++++++++- .../cdata/test-cdata-close-early-disabled.html | 2 ++ .../test-cdata-close-early-disabled.html.canonical | 14 ++++++++++++++ ...ata-close-early-disabled.html.canonical-domhtml | 14 ++++++++++++++ ...t-cdata-close-early-disabled.html.canonical-frg | 8 ++++++++ ...-cdata-close-early-disabled.html.canonical-html | 2 ++ .../test-cdata-close-early-disabled.html.settings | 1 + 7 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html create mode 100644 src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical create mode 100644 src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-domhtml create mode 100644 src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-frg create mode 100644 src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-html create mode 100644 src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.settings diff --git a/src/main/java/org/htmlunit/cyberneko/HTMLScanner.java b/src/main/java/org/htmlunit/cyberneko/HTMLScanner.java index 3a7f428a..94e71ab7 100644 --- a/src/main/java/org/htmlunit/cyberneko/HTMLScanner.java +++ b/src/main/java/org/htmlunit/cyberneko/HTMLScanner.java @@ -25,6 +25,7 @@ import java.net.URL; import java.util.Locale; +import org.htmlunit.cyberneko.HTMLElements.Element; import org.htmlunit.cyberneko.io.PlaybackInputStream; import org.htmlunit.cyberneko.util.MiniStack; import org.htmlunit.cyberneko.xerces.util.EncodingMap; @@ -59,6 +60,7 @@ *
  • http://cyberneko.org/html/features/scanner/style/strip-comment-delims *
  • http://cyberneko.org/html/features/scanner/ignore-specified-charset *
  • http://cyberneko.org/html/features/scanner/cdata-sections + *
  • http://cyberneko.org/html/features/scanner/cdata-early-closing *
  • http://cyberneko.org/html/features/override-doctype *
  • http://cyberneko.org/html/features/insert-doctype *
  • http://cyberneko.org/html/features/parse-noscript-content @@ -165,6 +167,9 @@ public class HTMLScanner implements XMLDocumentScanner, XMLLocator, HTMLComponen /** Scan CDATA sections. */ public static final String CDATA_SECTIONS = "http://cyberneko.org/html/features/scanner/cdata-sections"; + /** '>' closes the cdata section (see html spec) */ + public static final String CDATA_EARLY_CLOSING = "http://cyberneko.org/html/features/scanner/cdata-early-closing"; + /** Override doctype declaration public and system identifiers. */ public static final String OVERRIDE_DOCTYPE = "http://cyberneko.org/html/features/override-doctype"; @@ -193,6 +198,7 @@ public class HTMLScanner implements XMLDocumentScanner, XMLLocator, HTMLComponen STYLE_STRIP_COMMENT_DELIMS, IGNORE_SPECIFIED_CHARSET, CDATA_SECTIONS, + CDATA_EARLY_CLOSING, OVERRIDE_DOCTYPE, INSERT_DOCTYPE, NORMALIZE_ATTRIBUTES, @@ -210,6 +216,7 @@ public class HTMLScanner implements XMLDocumentScanner, XMLLocator, HTMLComponen Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, + Boolean.TRUE, Boolean.FALSE, Boolean.FALSE, Boolean.FALSE, @@ -336,6 +343,9 @@ public class HTMLScanner implements XMLDocumentScanner, XMLLocator, HTMLComponen /** CDATA sections. */ boolean fCDATASections_; + /** CDATA early closing. */ + boolean fCDATAEarlyClosing_; + /** Override doctype declaration public and system identifiers. */ private boolean fOverrideDoctype_; @@ -676,6 +686,7 @@ public void reset(final XMLComponentManager manager) throws XMLConfigurationExce fStyleStripCommentDelims_ = manager.getFeature(STYLE_STRIP_COMMENT_DELIMS); fIgnoreSpecifiedCharset_ = manager.getFeature(IGNORE_SPECIFIED_CHARSET); fCDATASections_ = manager.getFeature(CDATA_SECTIONS); + fCDATAEarlyClosing_ = manager.getFeature(CDATA_EARLY_CLOSING); fOverrideDoctype_ = manager.getFeature(OVERRIDE_DOCTYPE); fInsertDoctype_ = manager.getFeature(INSERT_DOCTYPE); fNormalizeAttributes_ = manager.getFeature(NORMALIZE_ATTRIBUTES); @@ -2655,7 +2666,7 @@ protected boolean scanCDataContent(final XMLString xmlString) throws IOException } break; } - else if (c == '>') { + else if (fCDATAEarlyClosing_&& c == '>') { // don't add the ]] to the buffer return false; } diff --git a/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html new file mode 100644 index 00000000..c3e3088b --- /dev/null +++ b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html @@ -0,0 +1,2 @@ +

    P1]]> +

    P2 \ No newline at end of file diff --git a/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical new file mode 100644 index 00000000..61534e3b --- /dev/null +++ b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical @@ -0,0 +1,14 @@ +(HTML +(head +)head +(BODY +(p +"P1 +#[CDATA[]] +"\n +)p +(p +"P2 +)p +)BODY +)HTML \ No newline at end of file diff --git a/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-domhtml b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-domhtml new file mode 100644 index 00000000..03b3c060 --- /dev/null +++ b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-domhtml @@ -0,0 +1,14 @@ +(HTML +(HEAD +)HEAD +(BODY +(P +"P1 +#[CDATA[]] +"\n +)P +(P +"P2 +)P +)BODY +)HTML \ No newline at end of file diff --git a/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-frg b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-frg new file mode 100644 index 00000000..62ee556d --- /dev/null +++ b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-frg @@ -0,0 +1,8 @@ +(p +"P1 +#[CDATA[]] +"\n +)p +(p +"P2 +)p \ No newline at end of file diff --git a/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-html b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-html new file mode 100644 index 00000000..ce450d28 --- /dev/null +++ b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.canonical-html @@ -0,0 +1,2 @@ +

    P1 +

    P2

    \ No newline at end of file diff --git a/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.settings b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.settings new file mode 100644 index 00000000..a7470d0d --- /dev/null +++ b/src/test/resources/org/htmlunit/cyberneko/testfiles/cdata/test-cdata-close-early-disabled.html.settings @@ -0,0 +1 @@ +feature http://cyberneko.org/html/features/scanner/cdata-early-closing false \ No newline at end of file