Properly handle CR LF in LineScanner. R=rnystrom@google.com Review URL: https://codereview.chromium.org//1327713003 .
diff --git a/CHANGELOG.md b/CHANGELOG.md index ce1e012..ddfd540 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md
@@ -1,3 +1,8 @@ +## 0.1.3+2 + +* Fix `LineScanner`'s handling of carriage returns to match that of + `SpanScanner`. + ## 0.1.3+1 * Fixed the homepage URL.
diff --git a/lib/src/line_scanner.dart b/lib/src/line_scanner.dart index 54ecf7b..6a2880b 100644 --- a/lib/src/line_scanner.dart +++ b/lib/src/line_scanner.dart
@@ -4,8 +4,13 @@ library string_scanner.line_scanner; +import 'package:charcode/ascii.dart'; + import 'string_scanner.dart'; +/// A regular expression matching newlines across platforms. +final _newlineRegExp = new RegExp(r"\r\n?|\n"); + /// A subclass of [StringScanner] that tracks line and column information. class LineScanner extends StringScanner { /// The scanner's current (zero-based) line number. @@ -24,6 +29,10 @@ LineScannerState get state => new LineScannerState._(this, position, line, column); + /// Whether the current position is between a CR character and an LF + /// charactet. + bool get _betweenCRLF => peekChar(-1) == $cr && peekChar() == $lf; + set state(LineScannerState state) { if (!identical(state._scanner, this)) { throw new ArgumentError("The given LineScannerState was not returned by " @@ -40,8 +49,7 @@ super.position = newPosition; if (newPosition > oldPosition) { - var newlines = - "\n".allMatches(string.substring(oldPosition, newPosition)).toList(); + var newlines = _newlinesIn(string.substring(oldPosition, newPosition)); _line += newlines.length; if (newlines.isEmpty) { _column += newPosition - oldPosition; @@ -49,13 +57,15 @@ _column = newPosition - newlines.last.end; } } else { - var newlines = - "\n".allMatches(string.substring(newPosition, oldPosition)).toList(); + var newlines = _newlinesIn(string.substring(newPosition, oldPosition)); + if (_betweenCRLF) newlines.removeLast(); + _line -= newlines.length; if (newlines.isEmpty) { _column -= oldPosition - newPosition; } else { - _column = newPosition - string.lastIndexOf("\n", newPosition) - 1; + _column = newPosition - + string.lastIndexOf(_newlineRegExp, newPosition) - 1; } } } @@ -65,7 +75,7 @@ int readChar() { var char = super.readChar(); - if (char == 0xA) { + if (char == $lf || (char == $cr && peekChar() != $lf)) { _line += 1; _column = 0; } else { @@ -77,7 +87,7 @@ bool scan(Pattern pattern) { if (!super.scan(pattern)) return false; - var newlines = "\n".allMatches(lastMatch[0]).toList(); + var newlines = _newlinesIn(lastMatch[0]); _line += newlines.length; if (newlines.isEmpty) { _column += lastMatch[0].length; @@ -87,6 +97,14 @@ return true; } + + /// Returns a list of [Match]es describing all the newlines in [text], which + /// is assumed to end at [position]. + List<Match> _newlinesIn(String text) { + var newlines = _newlineRegExp.allMatches(text).toList(); + if (_betweenCRLF) newlines.removeLast(); + return newlines; + } } /// A class representing the state of a [LineScanner].
diff --git a/pubspec.yaml b/pubspec.yaml index 16477be..efc7b0a 100644 --- a/pubspec.yaml +++ b/pubspec.yaml
@@ -1,10 +1,11 @@ name: string_scanner -version: 0.1.4-dev +version: 0.1.3+2 author: "Dart Team <misc@dartlang.org>" homepage: https://github.com/dart-lang/string_scanner description: > A class for parsing strings using a sequence of patterns. dependencies: + charcode: "^1.1.0" path: ">=1.2.0 <2.0.0" source_span: ">=1.0.0 <2.0.0" dev_dependencies:
diff --git a/test/line_scanner_test.dart b/test/line_scanner_test.dart index 83222d6..68e5c38 100644 --- a/test/line_scanner_test.dart +++ b/test/line_scanner_test.dart
@@ -10,7 +10,7 @@ void main() { var scanner; setUp(() { - scanner = new LineScanner('foo\nbar\nbaz'); + scanner = new LineScanner('foo\nbar\r\nbaz'); }); test('begins with line and column 0', () { @@ -33,7 +33,17 @@ test("consuming multiple newlines resets the column and increases the line", () { - scanner.expect('foo\nbar\nb'); + scanner.expect('foo\nbar\r\nb'); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(1)); + }); + + test("consuming halfway through a CR LF doesn't count as a line", () { + scanner.expect('foo\nbar\r'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + + scanner.expect('\nb'); expect(scanner.line, equals(2)); expect(scanner.column, equals(1)); }); @@ -56,11 +66,25 @@ expect(scanner.line, equals(1)); expect(scanner.column, equals(0)); }); + + test("consuming halfway through a CR LF doesn't count as a line", () { + scanner.expect('foo\nbar'); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(3)); + + scanner.readChar(); + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + + scanner.readChar(); + expect(scanner.line, equals(2)); + expect(scanner.column, equals(0)); + }); }); group("position=", () { test("forward through newlines sets the line and column", () { - scanner.position = 9; // "foo\nbar\nb" + scanner.position = 10; // "foo\nbar\r\nb" expect(scanner.line, equals(2)); expect(scanner.column, equals(1)); }); @@ -72,18 +96,24 @@ }); test("backward through newlines sets the line and column", () { - scanner.scan("foo\nbar\nbaz"); + scanner.scan("foo\nbar\r\nbaz"); scanner.position = 2; // "fo" expect(scanner.line, equals(0)); expect(scanner.column, equals(2)); }); test("backward through no newlines sets the column", () { - scanner.scan("foo\nbar\nbaz"); - scanner.position = 9; // "foo\nbar\nb" + scanner.scan("foo\nbar\r\nbaz"); + scanner.position = 10; // "foo\nbar\r\nb" expect(scanner.line, equals(2)); expect(scanner.column, equals(1)); }); + + test("forward halfway through a CR LF doesn't count as a line", () { + scanner.position = 8; // "foo\nbar\r" + expect(scanner.line, equals(1)); + expect(scanner.column, equals(4)); + }); }); test("state= restores the line, column, and position", () { @@ -92,7 +122,7 @@ scanner.scan('ar\nba'); scanner.state = state; - expect(scanner.rest, equals('ar\nbaz')); + expect(scanner.rest, equals('ar\r\nbaz')); expect(scanner.line, equals(1)); expect(scanner.column, equals(1)); });