Skip to content

Commit aa312aa

Browse files
davidbenjoshgoebel
authored andcommitted
cpp: Fix highlighting of unterminated raw strings
PR highlightjs#1897 switched C++ raw strings to use backreferences, however this breaks souce files where raw strings are truncated. Like comments, it would be preferable to highlight them. Instead, go back to using separate begin and end regexps, but introduce an endFilter feature to filter out false positive matches. This internally works similarly to endSameAsBegin. See also issue highlightjs#2259.
1 parent 8c248fd commit aa312aa

7 files changed

+54
-7
lines changed

docs/reference.rst

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ endSameAsBegin
190190
Acts as ``end`` matching exactly the same string that was found by the
191191
corresponding ``begin`` regexp.
192192

193-
For example, in PostgreSQL string constants can uee "dollar quotes",
193+
For example, in PostgreSQL string constants can use "dollar quotes",
194194
consisting of a dollar sign, an optional tag of zero or more characters,
195195
and another dollar sign. String constant must be ended with the same
196196
construct using the same tag. It is possible to nest dollar-quoted string
@@ -208,6 +208,26 @@ In this case you can't simply specify the same regexp for ``begin`` and
208208
``end`` (say, ``"\\$[a-z]\\$"``), but you can use ``begin: "\\$[a-z]\\$"``
209209
and ``endSameAsBegin: true``.
210210

211+
.. _endFilter:
212+
213+
endFilter
214+
^^^^^^^^^
215+
216+
**type**: function
217+
218+
Filters ``end`` matches to implement end rules that cannot be expressed as a
219+
standalone regular expression.
220+
221+
This should be a function which takes two string parameters, the string that
222+
matched the ``begin`` regexp and the string that matched the ``end`` regexp. It
223+
should return true to end the mode and false otherwise.
224+
225+
For example, C++11 raw string constants use syntax like ``R"tag(.....)tag"``,
226+
where ``tag`` is any zero to sixteen character string that must be repeated at
227+
the end. This could be matched with a single regexp containing backreferences,
228+
but truncated raw strings would not highlight. Instead, ``endFilter`` can be
229+
used to reject ``)tag"`` delimiters which do not match the starting value.
230+
211231
.. _lexemes:
212232

213233
lexemes

src/highlight.js

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,19 @@ const HLJS = function(hljs) {
119119
function _highlight(languageName, code, ignore_illegals, continuation) {
120120
var codeToHighlight = code;
121121

122-
function endOfMode(mode, lexeme) {
123-
if (regex.startsWith(mode.endRe, lexeme)) {
122+
function endOfMode(mode, matchPlusRemainder, lexeme) {
123+
var modeEnded = regex.startsWith(mode.endRe, matchPlusRemainder);
124+
if (modeEnded && mode.endFilter) {
125+
modeEnded = mode.endFilter(mode.beginValue, lexeme);
126+
}
127+
if (modeEnded) {
124128
while (mode.endsParent && mode.parent) {
125129
mode = mode.parent;
126130
}
127131
return mode;
128132
}
129133
if (mode.endsWithParent) {
130-
return endOfMode(mode.parent, lexeme);
134+
return endOfMode(mode.parent, matchPlusRemainder, lexeme);
131135
}
132136
}
133137

@@ -206,7 +210,7 @@ const HLJS = function(hljs) {
206210
if (mode.className) {
207211
emitter.openNode(mode.className)
208212
}
209-
top = Object.create(mode, {parent: {value: top}});
213+
top = Object.create(mode, {parent: {value: top}, beginValue: {value: lexeme}});
210214
}
211215

212216
function doIgnore(lexeme) {
@@ -255,7 +259,7 @@ const HLJS = function(hljs) {
255259
function doEndMatch(match) {
256260
var lexeme = match[0];
257261
var matchPlusRemainder = codeToHighlight.substr(match.index);
258-
var end_mode = endOfMode(top, matchPlusRemainder);
262+
var end_mode = endOfMode(top, matchPlusRemainder, lexeme);
259263
if (!end_mode) { return; }
260264

261265
var origin = top;

src/languages/c-like.js

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,16 @@ export default function(hljs) {
4444
begin: '(u8?|U|L)?\'(' + CHARACTER_ESCAPES + "|.)", end: '\'',
4545
illegal: '.'
4646
},
47-
{ begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\((?:.|\n)*?\)\1"/ }
47+
{
48+
begin: /(?:u8?|U|L)?R"[^()\\ ]{0,16}\(/,
49+
end: /\)[^()\\ ]{0,16}"/,
50+
endFilter: function(begin, end) {
51+
var quote = begin.indexOf('"');
52+
var beginDelimiter = begin.substring(quote + 1, begin.length - 1);
53+
var endDelimiter = end.substring(1, end.length - 1);
54+
return beginDelimiter == endDelimiter;
55+
},
56+
}
4857
]
4958
};
5059

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<span class="hljs-comment">/*
2+
Truncated block comment
3+
</span>
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/*
2+
Truncated block comment
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<span class="hljs-string">R"foo(
2+
Truncated raw string
3+
)nope"
4+
Still not completed.
5+
</span>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
R"foo(
2+
Truncated raw string
3+
)nope"
4+
Still not completed.

0 commit comments

Comments
 (0)