Skip to content

Commit a13e5e9

Browse files
committed
Auto merge of #137027 - hkBst:single_pass_skip_ws, r=<try>
Make skip_whitespace do a single pass Offshoot from #136931 that I'd like to see the performance impact of. r? `@nnethercote`
2 parents bdc97d1 + d15cd2e commit a13e5e9

File tree

1 file changed

+29
-16
lines changed

1 file changed

+29
-16
lines changed

compiler/rustc_lexer/src/unescape.rs

+29-16
Original file line numberDiff line numberDiff line change
@@ -356,8 +356,10 @@ where
356356
let start = src.len() - chars.as_str().len() - c.len_utf8();
357357
let res = match c {
358358
'\\' => {
359-
match chars.clone().next() {
359+
let mut chars_clone = chars.clone();
360+
match chars_clone.next() {
360361
Some('\n') => {
362+
chars = chars_clone;
361363
// Rust language specification requires us to skip whitespaces
362364
// if unescaped '\' character is followed by '\n'.
363365
// For details see [Rust language reference]
@@ -379,30 +381,41 @@ where
379381
}
380382
}
381383

384+
/// Skip ASCII whitespace, except for the formfeed character
385+
/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
386+
/// Warns on unescaped newline and following non-ASCII whitespace.
382387
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
383388
where
384389
F: FnMut(Range<usize>, EscapeError),
385390
{
386-
let tail = chars.as_str();
387-
let first_non_space = tail
388-
.bytes()
389-
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
390-
.unwrap_or(tail.len());
391-
if tail[1..first_non_space].contains('\n') {
392-
// The +1 accounts for the escaping slash.
393-
let end = start + first_non_space + 1;
391+
// the escaping slash and newline characters add 2 bytes
392+
let mut end = start + 2;
393+
let mut contains_nl = false;
394+
395+
// manual next_if loop
396+
let mut next_char;
397+
loop {
398+
let mut chars_clone = chars.clone();
399+
next_char = chars_clone.next();
400+
match next_char {
401+
Some(c) if c.is_ascii_whitespace() && c != '\x0c' => {
402+
*chars = chars_clone;
403+
end += 1;
404+
contains_nl |= c == '\n';
405+
}
406+
_ => break,
407+
}
408+
}
409+
410+
if contains_nl {
394411
callback(start..end, EscapeError::MultipleSkippedLinesWarning);
395412
}
396-
let tail = &tail[first_non_space..];
397-
if let Some(c) = tail.chars().next() {
413+
if let Some(c) = next_char {
398414
if c.is_whitespace() {
399-
// For error reporting, we would like the span to contain the character that was not
400-
// skipped. The +1 is necessary to account for the leading \ that started the escape.
401-
let end = start + first_non_space + c.len_utf8() + 1;
402-
callback(start..end, EscapeError::UnskippedWhitespaceWarning);
415+
// for error reporting, include the character that was not skipped in the span
416+
callback(start..end + c.len_utf8(), EscapeError::UnskippedWhitespaceWarning);
403417
}
404418
}
405-
*chars = tail.chars();
406419
}
407420

408421
/// Takes a contents of a string literal (without quotes) and produces a

0 commit comments

Comments
 (0)