From 4d042a7c500ffefa99b7aa7e172fe3cdcf1c7594 Mon Sep 17 00:00:00 2001 From: Thiemo Kreuz Date: Tue, 12 Feb 2019 14:04:43 +0100 Subject: [PATCH] Fix regexp not removing multi-line comments The previous regular expression had two issues: 1. It was only able to remove 1-line comments. 2. It was not set to be ungreedy, and could potentially remove content between two comments. E.g. when something like ` content ` does not contain a newline character, the content would actually be removed. We run into this issue at https://gerrit.wikimedia.org/r/489323. As a temporary workaround we made all our comments 1-line comments, and made sure each comment is on a separate line. --- index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 584d278..2c9ec2f 100644 --- a/index.js +++ b/index.js @@ -12,10 +12,10 @@ var regexSequences = [ // Remove XML stuffs and comments [/<\?xml[\s\S]*?>/gi, ""], [//gi, ""], - [//gi, ""], + [//g, ""], // SVG XML -> HTML5 - [/\<([A-Za-z]+)([^\>]*)\/\>/g, "<$1$2>"], // convert self-closing XML SVG nodes to explicitly closed HTML5 SVG nodes + [/\<([a-z]+)([^\>]*)\/\>/gi, "<$1$2>"], // convert self-closing XML SVG nodes to explicitly closed HTML5 SVG nodes [/\s+/g, " "], // replace whitespace sequences with a single space [/\> \<"] // remove whitespace between tags ];