Skip to content
This repository was archived by the owner on Feb 10, 2025. It is now read-only.

Don't allow CR to act as newline when it is followed by LF #81

Merged
merged 5 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## 1.4.0

* Fix `LineScanner`'s handling of `\r\n`'s to preventing errors scanning
zero-length matches when between CR and LF. CR is treated as a new line only
if not immediately followed by a LF.
* Fix `LineScanner`'s updating of `column` when setting `position` if the
current position is not `0`.

## 1.3.0

* Require Dart 3.1.0
Expand Down
70 changes: 56 additions & 14 deletions lib/src/line_scanner.dart
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ import 'utils.dart';

// Note that much of this code is duplicated in eager_span_scanner.dart.

/// A regular expression matching newlines across platforms.
final _newlineRegExp = RegExp(r'\r\n?|\n');
/// A regular expression matching newlines. A newline is either a `\n`, a `\r\n`
/// or a `\r` that is not immediately followed by a `\n`.
final _newlineRegExp = RegExp(r'\n|\r\n|\r(?!\n)');

/// A subclass of [StringScanner] that tracks line and column information.
class LineScanner extends StringScanner {
Expand Down Expand Up @@ -48,27 +49,57 @@ class LineScanner extends StringScanner {

@override
set position(int newPosition) {
if (newPosition == position) {
return;
}

final oldPosition = position;
super.position = newPosition;

if (newPosition > oldPosition) {
final newlines = _newlinesIn(string.substring(oldPosition, newPosition));
if (newPosition == 0) {
_line = 0;
_column = 0;
} else if (newPosition > oldPosition) {
final newlines = _newlinesIn(string.substring(oldPosition, newPosition),
endPosition: newPosition);
_line += newlines.length;
if (newlines.isEmpty) {
_column += newPosition - oldPosition;
} else {
_column = newPosition - newlines.last.end;
// The regex got a substring, so we need to account for where it started
// in the string.
final offsetOfLastNewline = oldPosition + newlines.last.end;
_column = newPosition - offsetOfLastNewline;
}
} else {
final newlines = _newlinesIn(string.substring(newPosition, oldPosition));
if (_betweenCRLF) newlines.removeLast();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line has likely been a bug since this landed. It was introduced during code review.

} else if (newPosition < oldPosition) {
final newlines = _newlinesIn(string.substring(newPosition, oldPosition),
endPosition: oldPosition);

_line -= newlines.length;
if (newlines.isEmpty) {
_column -= oldPosition - newPosition;
} else {
_column =
newPosition - string.lastIndexOf(_newlineRegExp, newPosition) - 1;
// To compute the new column, we need to locate the last newline before
// the new position. When searching, we must exclude the CR if we're
// between a CRLF because it's not considered a newline.
final crOffset = _betweenCRLF ? -1 : 0;
// Additionally, if we use newPosition as the end of the search and the
// character at that position itself (the next character) is a newline
// we should not use it, so also offset to account for that.
const currentCharOffset = -1;
final lastNewline = string.lastIndexOf(
_newlineRegExp, newPosition + currentCharOffset + crOffset);

// Now we need to know the offset after the newline. This is the index
// above plus the length of the newline (eg. if we found `\r\n`) we need
// to add two. However if no newline was found, that index is 0.
final offsetAfterLastNewline = lastNewline == -1
? 0
: string[lastNewline] == '\r' && string[lastNewline + 1] == '\n'
? lastNewline + 2
: lastNewline + 1;

_column = newPosition - offsetAfterLastNewline;
}
}
}
Expand Down Expand Up @@ -103,7 +134,7 @@ class LineScanner extends StringScanner {
bool scan(Pattern pattern) {
if (!super.scan(pattern)) return false;

final newlines = _newlinesIn(lastMatch![0]!);
final newlines = _newlinesIn(lastMatch![0]!, endPosition: position);
_line += newlines.length;
if (newlines.isEmpty) {
_column += lastMatch![0]!.length;
Expand All @@ -115,10 +146,21 @@ class LineScanner extends StringScanner {
}

/// Returns a list of [Match]es describing all the newlines in [text], which
/// is assumed to end at [position].
List<Match> _newlinesIn(String text) {
/// ends at [endPosition].
///
/// If [text] ends with `\r`, it will only be treated as a newline if the next
/// character at [position] is not a `\n`.
List<Match> _newlinesIn(String text, {required int endPosition}) {
final newlines = _newlineRegExp.allMatches(text).toList();
if (_betweenCRLF) newlines.removeLast();
// If the last character is a `\r` it will have been treated as a newline,
// but this is only valid if the next character is not a `\n`.
if (endPosition < string.length &&
text.endsWith('\r') &&
string[endPosition] == '\n') {
// newlines should never be empty here, because if `text` ends with `\r`
// it would have matched `\r(?!\n)` in the newline regex.
newlines.removeLast();
}
return newlines;
}
}
Expand Down
2 changes: 1 addition & 1 deletion pubspec.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: string_scanner
version: 1.3.0
version: 1.4.0
description: A class for parsing strings using a sequence of patterns.
repository: https://github.com/dart-lang/string_scanner

Expand Down
Loading