Skip to content

Commit 38f2aa8

Browse files
hkBstfolkertdev
andcommitted
Make skip_whitespace do a single pass
Co-authored-by: Folkert de Vries <[email protected]>
1 parent 019fc4d commit 38f2aa8

File tree

1 file changed

+28
-20
lines changed

1 file changed

+28
-20
lines changed

compiler/rustc_lexer/src/unescape.rs

+28-20
Original file line numberDiff line numberDiff line change
@@ -356,11 +356,10 @@ where
356356
let start = src.len() - chars.as_str().len() - c.len_utf8();
357357
let res = match c {
358358
'\\' => {
359-
match chars.clone().next() {
360-
Some('\n') => {
361-
// Rust language specification requires us to skip whitespaces
362-
// if unescaped '\' character is followed by '\n'.
363-
// For details see [Rust language reference]
359+
match chars.as_str().as_bytes().first() {
360+
Some(b'\n') => {
361+
let _ = chars.next();
362+
// skip whitespace for backslash newline, see [Rust language reference]
364363
// (https://doc.rust-lang.org/reference/tokens.html#string-literals).
365364
skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
366365
callback(range, Err(err))
@@ -379,30 +378,39 @@ where
379378
}
380379
}
381380

381+
/// Skip ASCII whitespace, except for the formfeed character
382+
/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
383+
/// Warns on unescaped newline and following non-ASCII whitespace.
382384
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
383385
where
384386
F: FnMut(Range<usize>, EscapeError),
385387
{
386-
let tail = chars.as_str();
387-
let first_non_space = tail
388-
.bytes()
389-
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
390-
.unwrap_or(tail.len());
391-
if tail[1..first_non_space].contains('\n') {
392-
// The +1 accounts for the escaping slash.
393-
let end = start + first_non_space + 1;
388+
let mut spaces = 0;
389+
let mut contains_nl = false;
390+
391+
for byte in chars.as_str().bytes() {
392+
// this is 2000x faster than b" \t\n\r".contains(&byte);
393+
let is_space = byte != b' ' && byte != b'\t' && byte != b'\n' && byte != b'\r';
394+
spaces += is_space as usize;
395+
contains_nl |= byte == b'\n';
396+
if !is_space {
397+
break;
398+
}
399+
}
400+
*chars = chars.as_str()[spaces..].chars();
401+
402+
// the escaping slash and newline characters add 2 bytes
403+
let end = start + 2 + spaces;
404+
405+
if contains_nl {
394406
callback(start..end, EscapeError::MultipleSkippedLinesWarning);
395407
}
396-
let tail = &tail[first_non_space..];
397-
if let Some(c) = tail.chars().next() {
408+
if let Some(c) = chars.clone().next() {
398409
if c.is_whitespace() {
399-
// For error reporting, we would like the span to contain the character that was not
400-
// skipped. The +1 is necessary to account for the leading \ that started the escape.
401-
let end = start + first_non_space + c.len_utf8() + 1;
402-
callback(start..end, EscapeError::UnskippedWhitespaceWarning);
410+
// for error reporting, include the character that was not skipped in the span
411+
callback(start..end + c.len_utf8(), EscapeError::UnskippedWhitespaceWarning);
403412
}
404413
}
405-
*chars = tail.chars();
406414
}
407415

408416
/// Takes a contents of a string literal (without quotes) and produces a

0 commit comments

Comments
 (0)