From a76244fcef9e1ae83b6a7631f33c409c156ad3d9 Mon Sep 17 00:00:00 2001 From: Lee Jeffery Date: Fri, 8 May 2015 20:33:58 +0100 Subject: [PATCH 1/5] Fix CRLF line-ending parsing for comments. --- src/libsyntax/parse/lexer/mod.rs | 50 +++++++++++++++++--------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 6b0674c9a41b4..1a772783c9d3e 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -403,45 +403,47 @@ impl<'a> StringReader<'a> { Some('/') => { self.bump(); self.bump(); + // line comments starting with "///" or "//!" are doc-comments - if self.curr_is('/') || self.curr_is('!') { - let start_bpos = self.pos - BytePos(3); - while !self.is_eof() { - match self.curr.unwrap() { - '\n' => break, - '\r' => { - if self.nextch_is('\n') { - // CRLF - break - } else { - self.err_span_(self.last_pos, self.pos, - "bare CR not allowed in doc-comment"); - } + let doc_comment = self.curr_is('/') || self.curr_is('!'); + let start_bpos = self.pos - BytePos(3); + + while !self.is_eof() { + match self.curr.unwrap() { + '\n' => break, + '\r' => { + if self.nextch_is('\n') { + // CRLF + break + } else { + self.err_span_(self.last_pos, self.pos, + "bare CR not allowed in comment"); } - _ => () } - self.bump(); + _ => () } - return self.with_str_from(start_bpos, |string| { - // but comments with only more "/"s are not + self.bump(); + } + + return if doc_comment { + self.with_str_from(start_bpos, |string| { + // comments with only more "/"s are not doc comments let tok = if is_doc_comment(string) { token::DocComment(token::intern(string)) } else { token::Comment }; - return Some(TokenAndSpan{ + Some(TokenAndSpan { tok: tok, sp: codemap::mk_sp(start_bpos, self.last_pos) - }); - }); + }) + }) } else { - let start_bpos = self.last_pos - BytePos(2); - while !self.curr_is('\n') && !self.is_eof() { self.bump(); } - return Some(TokenAndSpan { + Some(TokenAndSpan { tok: token::Comment, sp: codemap::mk_sp(start_bpos, self.last_pos) - }); + }) } } Some('*') => { From aef0581513f3fd01068cb423d9c62cb48b3f077a Mon Sep 17 00:00:00 2001 From: Lee Jeffery Date: Wed, 13 May 2015 22:05:01 +0100 Subject: [PATCH 2/5] Fix byte offset and error message inconsistencies --- src/libsyntax/parse/lexer/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 1a772783c9d3e..81b283a622ea0 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -406,7 +406,7 @@ impl<'a> StringReader<'a> { // line comments starting with "///" or "//!" are doc-comments let doc_comment = self.curr_is('/') || self.curr_is('!'); - let start_bpos = self.pos - BytePos(3); + let start_bpos = self.last_pos - BytePos(2); while !self.is_eof() { match self.curr.unwrap() { @@ -415,9 +415,9 @@ impl<'a> StringReader<'a> { if self.nextch_is('\n') { // CRLF break - } else { + } else if doc_comment { self.err_span_(self.last_pos, self.pos, - "bare CR not allowed in comment"); + "bare CR not allowed in doc-comment"); } } _ => () From 4f82c3151b063a4226f33df047d4e748ef5e3f13 Mon Sep 17 00:00:00 2001 From: Lee Jeffery Date: Wed, 13 May 2015 22:06:26 +0100 Subject: [PATCH 3/5] Added test to check that newlines are stripped from comments --- src/libsyntax/parse/lexer/mod.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 81b283a622ea0..184f503bbc238 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1565,4 +1565,13 @@ mod tests { assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None)); } + #[test] fn crlf_comments() { + let sh = mk_sh(); + let mut lexer = setup(&sh, "// test\r\n/// test\r\n".to_string()); + let comment = lexer.next_token(); + assert_eq!(comment.tok, token::Comment); + assert_eq!(comment.sp, ::codemap::mk_sp(BytePos(0), BytePos(7))); + assert_eq!(lexer.next_token().tok, token::Whitespace); + assert_eq!(lexer.next_token().tok, token::DocComment(token::intern("/// test"))); + } } From 93af5f9b446adfeb95abdd8df76cf961451f9d91 Mon Sep 17 00:00:00 2001 From: Lee Jeffery Date: Thu, 14 May 2015 18:19:51 +0100 Subject: [PATCH 4/5] Make BytePos calculation same as original --- src/libsyntax/parse/lexer/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 184f503bbc238..1ca4451d28d09 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -406,7 +406,6 @@ impl<'a> StringReader<'a> { // line comments starting with "///" or "//!" are doc-comments let doc_comment = self.curr_is('/') || self.curr_is('!'); - let start_bpos = self.last_pos - BytePos(2); while !self.is_eof() { match self.curr.unwrap() { @@ -426,6 +425,7 @@ impl<'a> StringReader<'a> { } return if doc_comment { + let start_bpos = self.pos - BytePos(3); self.with_str_from(start_bpos, |string| { // comments with only more "/"s are not doc comments let tok = if is_doc_comment(string) { @@ -440,6 +440,7 @@ impl<'a> StringReader<'a> { }) }) } else { + let start_bpos = self.last_pos - BytePos(2); Some(TokenAndSpan { tok: token::Comment, sp: codemap::mk_sp(start_bpos, self.last_pos) From 2dcc200be0f17ec0cab568736d9d3ccbae453a65 Mon Sep 17 00:00:00 2001 From: Lee Jeffery Date: Thu, 14 May 2015 18:28:28 +0100 Subject: [PATCH 5/5] Fix stupid mistake from previous commit --- src/libsyntax/parse/lexer/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 1ca4451d28d09..b70594a96a028 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -406,6 +406,11 @@ impl<'a> StringReader<'a> { // line comments starting with "///" or "//!" are doc-comments let doc_comment = self.curr_is('/') || self.curr_is('!'); + let start_bpos = if doc_comment { + self.pos - BytePos(3) + } else { + self.last_pos - BytePos(2) + }; while !self.is_eof() { match self.curr.unwrap() { @@ -425,7 +430,6 @@ impl<'a> StringReader<'a> { } return if doc_comment { - let start_bpos = self.pos - BytePos(3); self.with_str_from(start_bpos, |string| { // comments with only more "/"s are not doc comments let tok = if is_doc_comment(string) { @@ -440,7 +444,6 @@ impl<'a> StringReader<'a> { }) }) } else { - let start_bpos = self.last_pos - BytePos(2); Some(TokenAndSpan { tok: token::Comment, sp: codemap::mk_sp(start_bpos, self.last_pos)