Skip to content

std: Account for CRLF in {str, BufRead}::lines #28034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 4, 2015
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
@@ -604,14 +604,14 @@ impl str {
UnicodeStr::split_whitespace(self)
}

/// An iterator over the lines of a string, separated by `\n`.
/// An iterator over the lines of a string, separated by `\n` or `\r\n`.
///
/// This does not include the empty string after a trailing `\n`.
/// This does not include the empty string after a trailing newline or CRLF.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should clarify (like the BufRead one does) that the newlines are stripped from the strings that are yielded.

///
/// # Examples
///
/// ```
/// let four_lines = "foo\nbar\n\nbaz";
/// let four_lines = "foo\nbar\n\r\nbaz";
/// let v: Vec<&str> = four_lines.lines().collect();
///
/// assert_eq!(v, ["foo", "bar", "", "baz"]);
@@ -620,7 +620,7 @@ impl str {
/// Leaving off the trailing character:
///
/// ```
/// let four_lines = "foo\nbar\n\nbaz\n";
/// let four_lines = "foo\r\nbar\n\nbaz\n";
/// let v: Vec<&str> = four_lines.lines().collect();
///
/// assert_eq!(v, ["foo", "bar", "", "baz"]);
@@ -654,7 +654,9 @@ impl str {
/// assert_eq!(v, ["foo", "bar", "", "baz"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[deprecated(since = "1.4.0", reason = "use lines() instead now")]
#[inline]
#[allow(deprecated)]
pub fn lines_any(&self) -> LinesAny {
core_str::StrExt::lines_any(self)
}
4 changes: 2 additions & 2 deletions src/libcollectionstest/str.rs
Original file line number Diff line number Diff line change
@@ -964,11 +964,11 @@ fn test_split_whitespace() {

#[test]
fn test_lines() {
let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
let data = "\nMäry häd ä little lämb\n\r\nLittle lämb\n";
let lines: Vec<&str> = data.lines().collect();
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);

let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
let data = "\r\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
let lines: Vec<&str> = data.lines().collect();
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we have a test for a trailing \r\n?

14 changes: 10 additions & 4 deletions src/libcore/str/mod.rs
Original file line number Diff line number Diff line change
@@ -827,7 +827,7 @@ generate_pattern_iterators! {
/// Created with the method `.lines()`.
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Clone)]
pub struct Lines<'a>(SplitTerminator<'a, char>);
pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Lines<'a> {
@@ -854,8 +854,10 @@ impl<'a> DoubleEndedIterator for Lines<'a> {

/// Created with the method `.lines_any()`.
#[stable(feature = "rust1", since = "1.0.0")]
#[deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
#[derive(Clone)]
pub struct LinesAny<'a>(Map<Lines<'a>, LinesAnyMap>);
#[allow(deprecated)]
pub struct LinesAny<'a>(Lines<'a>);

/// A nameable, clonable fn type
#[derive(Clone)]
@@ -887,6 +889,7 @@ impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
}

#[stable(feature = "rust1", since = "1.0.0")]
#[allow(deprecated)]
impl<'a> Iterator for LinesAny<'a> {
type Item = &'a str;

@@ -902,6 +905,7 @@ impl<'a> Iterator for LinesAny<'a> {
}

#[stable(feature = "rust1", since = "1.0.0")]
#[allow(deprecated)]
impl<'a> DoubleEndedIterator for LinesAny<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
@@ -1289,6 +1293,7 @@ pub trait StrExt {
fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
where P::Searcher: ReverseSearcher<'a>;
fn lines(&self) -> Lines;
#[allow(deprecated)]
fn lines_any(&self) -> LinesAny;
fn char_len(&self) -> usize;
fn slice_chars(&self, begin: usize, end: usize) -> &str;
@@ -1428,12 +1433,13 @@ impl StrExt for str {
}
#[inline]
fn lines(&self) -> Lines {
Lines(self.split_terminator('\n'))
Lines(self.split_terminator('\n').map(LinesAnyMap))
}

#[inline]
#[allow(deprecated)]
fn lines_any(&self) -> LinesAny {
LinesAny(self.lines().map(LinesAnyMap))
LinesAny(self.lines())
}

#[inline]
2 changes: 1 addition & 1 deletion src/librustdoc/passes.rs
Original file line number Diff line number Diff line change
@@ -308,7 +308,7 @@ pub fn collapse_docs(krate: clean::Crate) -> plugins::PluginResult {
}

pub fn unindent(s: &str) -> String {
let lines = s.lines_any().collect::<Vec<&str> >();
let lines = s.lines().collect::<Vec<&str> >();
let mut saw_first_line = false;
let mut saw_second_line = false;
let min_indent = lines.iter().fold(usize::MAX, |min_indent, line| {
11 changes: 7 additions & 4 deletions src/libstd/io/mod.rs
Original file line number Diff line number Diff line change
@@ -1439,7 +1439,7 @@ pub trait BufRead: Read {
///
/// The iterator returned from this function will yield instances of
/// `io::Result<String>`. Each string returned will *not* have a newline
/// byte (the 0xA byte) at the end.
/// byte (the 0xA byte) or CRLF (0xD, 0xA bytes) at the end.
///
/// # Examples
///
@@ -1763,6 +1763,9 @@ impl<B: BufRead> Iterator for Lines<B> {
Ok(_n) => {
if buf.ends_with("\n") {
buf.pop();
if buf.ends_with("\r") {
buf.pop();
}
}
Some(Ok(buf))
}
@@ -1834,12 +1837,12 @@ mod tests {

#[test]
fn lines() {
let buf = Cursor::new(&b"12"[..]);
let buf = Cursor::new(&b"12\r"[..]);
let mut s = buf.lines();
assert_eq!(s.next().unwrap().unwrap(), "12".to_string());
assert_eq!(s.next().unwrap().unwrap(), "12\r".to_string());
assert!(s.next().is_none());

let buf = Cursor::new(&b"12\n\n"[..]);
let buf = Cursor::new(&b"12\r\n\n"[..]);
let mut s = buf.lines();
assert_eq!(s.next().unwrap().unwrap(), "12".to_string());
assert_eq!(s.next().unwrap().unwrap(), "".to_string());
2 changes: 1 addition & 1 deletion src/libsyntax/parse/lexer/comments.rs
Original file line number Diff line number Diff line change
@@ -132,7 +132,7 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String {

if comment.starts_with("/*") {
let lines = comment[3..comment.len() - 2]
.lines_any()
.lines()
.map(|s| s.to_string())
.collect::<Vec<String> >();