Skip to content

Commit 58d9113

Browse files
davidbenjoshgoebel
andcommitted
enh(cpp): Improve highlighting of unterminated raw strings
PR #1897 switched C++ raw strings to use backreferences, however this breaks souce files where raw strings are truncated. Like comments, it would be preferable to highlight them. - Add `on:begin` and `on:end` to allow more granular matching when then end match is dynamic and based on a part of the begin match - This deprecates the `endSameAsBegin` attribute. That attribute was a very specific way to solve this problem, but now we have a much more general solution in these added callbacks. Also related: #2259. Co-authored-by: Josh Goebel <[email protected]>
1 parent 0afd0d3 commit 58d9113

File tree

9 files changed

+82
-27
lines changed

9 files changed

+82
-27
lines changed

docs/reference.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ In this case you can't simply specify the same regexp for ``begin`` and
208208
``end`` (say, ``"\\$[a-z]\\$"``), but you can use ``begin: "\\$[a-z]\\$"``
209209
and ``endSameAsBegin: true``.
210210

211+
211212
.. _lexemes:
212213

213214
lexemes

src/highlight.js

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ https://highlightjs.org/
44
*/
55

66
import deepFreeze from './vendor/deep_freeze';
7+
import Response from './lib/response';
78
import TokenTreeEmitter from './lib/token_tree';
89
import * as regex from './lib/regex';
910
import * as utils from './lib/utils';
@@ -118,18 +119,6 @@ const HLJS = function(hljs) {
118119
function _highlight(languageName, code, ignoreIllegals, continuation) {
119120
var codeToHighlight = code;
120121

121-
function endOfMode(mode, lexeme) {
122-
if (regex.startsWith(mode.endRe, lexeme)) {
123-
while (mode.endsParent && mode.parent) {
124-
mode = mode.parent;
125-
}
126-
return mode;
127-
}
128-
if (mode.endsWithParent) {
129-
return endOfMode(mode.parent, lexeme);
130-
}
131-
}
132-
133122
function keywordData(mode, match) {
134123
var matchText = language.case_insensitive ? match[0].toLowerCase() : match[0];
135124
return Object.prototype.hasOwnProperty.call(mode.keywords, matchText) && mode.keywords[matchText];
@@ -206,7 +195,33 @@ const HLJS = function(hljs) {
206195
if (mode.className) {
207196
emitter.openNode(mode.className);
208197
}
209-
top = Object.create(mode, { parent: { value: top } });
198+
top = Object.create(mode, {parent: {value: top}});
199+
return top;
200+
}
201+
202+
function endOfMode(mode, match, matchPlusRemainder) {
203+
let matched = regex.startsWith(mode.endRe, matchPlusRemainder);
204+
205+
if (matched) {
206+
if (mode["before:end"]) {
207+
let resp = new Response(mode);
208+
mode["before:end"](match, resp);
209+
if (resp.ignore)
210+
matched = false;
211+
}
212+
213+
if (matched) {
214+
while (mode.endsParent && mode.parent) {
215+
mode = mode.parent;
216+
}
217+
return mode;
218+
}
219+
}
220+
// even if before:end fires an `ignore` it's still possible
221+
// that we might trigger the end node because of a parent mode
222+
if (mode.endsWithParent) {
223+
return endOfMode(mode.parent, match, matchPlusRemainder);
224+
}
210225
}
211226

212227
function doIgnore(lexeme) {
@@ -226,12 +241,15 @@ const HLJS = function(hljs) {
226241
function doBeginMatch(match) {
227242
var lexeme = match[0];
228243
var new_mode = match.rule;
229-
230-
if (new_mode.__onBegin) {
231-
const res = new_mode.__onBegin(match) || {};
232-
if (res.ignoreMatch) {
233-
return doIgnore(lexeme);
234-
}
244+
var mode;
245+
246+
let resp = new Response(new_mode);
247+
// first internal before callbacks, then the public ones
248+
let beforeCallbacks = [new_mode.__beforeBegin, new_mode["before:begin"]];
249+
for (let cb of beforeCallbacks) {
250+
if (!cb) continue;
251+
cb(match, resp);
252+
if (resp.ignore) return doIgnore(lexeme);
235253
}
236254

237255
if (new_mode && new_mode.endSameAsBegin) {
@@ -249,14 +267,19 @@ const HLJS = function(hljs) {
249267
mode_buffer = lexeme;
250268
}
251269
}
252-
startNewMode(new_mode);
270+
mode = startNewMode(new_mode);
271+
if (mode["after:begin"]) {
272+
let resp = new Response(mode);
273+
mode["after:begin"](match, resp);
274+
}
253275
return new_mode.returnBegin ? 0 : lexeme.length;
254276
}
255277

256278
function doEndMatch(match) {
257279
var lexeme = match[0];
258280
var matchPlusRemainder = codeToHighlight.substr(match.index);
259-
var end_mode = endOfMode(top, matchPlusRemainder);
281+
282+
var end_mode = endOfMode(top, match, matchPlusRemainder);
260283
if (!end_mode) { return NO_MATCH; }
261284

262285
var origin = top;

src/languages/c-like.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,12 @@ export default function(hljs) {
4444
begin: '(u8?|U|L)?\'(' + CHARACTER_ESCAPES + "|.)", end: '\'',
4545
illegal: '.'
4646
},
47-
{ begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\((?:.|\n)*?\)\1"/ }
47+
{
48+
begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/,
49+
end: /\)([^()\\ ]{0,16})"/,
50+
'after:begin': (m, resp) => { resp.data.heredoc = m[1]; },
51+
'before:end': function(m, resp) { if (resp.data.heredoc !== m[1]) resp.ignoreMatch(); }
52+
}
4853
]
4954
};
5055

src/lib/mode_compiler.js

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ export function compileLanguage(language) {
6161
// eslint-disable-next-line no-undefined
6262
const i = match.findIndex((el, i) => i > 0 && el !== undefined);
6363
const matchData = this.matchIndexes[i];
64+
match.splice(0, i); // // trim off the extra matches
6465

6566
return Object.assign(match, matchData);
6667
}
@@ -158,11 +159,11 @@ export function compileLanguage(language) {
158159
}
159160

160161
// TODO: We need negative look-behind support to do this properly
161-
function skipIfhasPrecedingOrTrailingDot(match) {
162+
function skipIfhasPrecedingOrTrailingDot(match, resp) {
162163
const before = match.input[match.index - 1];
163164
const after = match.input[match.index + match[0].length];
164165
if (before === "." || after === ".") {
165-
return { ignoreMatch: true };
166+
resp.ignoreMatch();
166167
}
167168
}
168169

@@ -200,8 +201,8 @@ export function compileLanguage(language) {
200201
if (mode.compiled) return;
201202
mode.compiled = true;
202203

203-
// __onBegin is considered private API, internal use only
204-
mode.__onBegin = null;
204+
// __beforeBegin is considered private API, internal use only
205+
mode.__beforeBegin = null;
205206

206207
mode.keywords = mode.keywords || mode.beginKeywords;
207208
if (mode.keywords) {
@@ -218,7 +219,7 @@ export function compileLanguage(language) {
218219
// doesn't allow spaces in keywords anyways and we still check for the boundary
219220
// first
220221
mode.begin = '\\b(' + mode.beginKeywords.split(' ').join('|') + ')(?=\\b|\\s)';
221-
mode.__onBegin = skipIfhasPrecedingOrTrailingDot;
222+
mode.__beforeBegin = skipIfhasPrecedingOrTrailingDot;
222223
}
223224
if (!mode.begin)
224225
mode.begin = /\B|\b/;

src/lib/response.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
export default class Response {
2+
constructor(mode) {
3+
if (mode.data === undefined)
4+
mode.data = {};
5+
this.data = mode.data;
6+
}
7+
8+
ignoreMatch() {
9+
this.ignore = true;
10+
}
11+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<span class="hljs-comment">/*
2+
Truncated block comment
3+
</span>
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/*
2+
Truncated block comment
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<span class="hljs-string">R"foo(
2+
Truncated raw string
3+
)nope"
4+
Still not completed.
5+
</span>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
R"foo(
2+
Truncated raw string
3+
)nope"
4+
Still not completed.

0 commit comments

Comments
 (0)