Skip to content

Commit ee62c0e

Browse files
committed
implement rfc 1054: split_whitespace() fn, deprecate words()
For now, words() is left in (but deprecated), and Words is a type alias for struct SplitWhitespace. Also cleaned up references to s.words() throughout codebase. Closes #15628
1 parent b03a4ad commit ee62c0e

File tree

11 files changed

+53
-28
lines changed

11 files changed

+53
-28
lines changed

src/libcollections/str.rs

+23-6
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ pub use core::str::{Matches, RMatches};
7676
pub use core::str::{MatchIndices, RMatchIndices};
7777
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
7878
pub use core::str::{from_utf8_unchecked, ParseBoolError};
79-
pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
79+
pub use rustc_unicode::str::{SplitWhitespace, Words, Graphemes, GraphemeIndices};
8080
pub use core::str::pattern;
8181

8282
/*
@@ -1737,27 +1737,44 @@ impl str {
17371737
UnicodeStr::grapheme_indices(&self[..], is_extended)
17381738
}
17391739

1740-
/// An iterator over the non-empty words of `self`.
1741-
///
1742-
/// A 'word' is a subsequence separated by any sequence of whitespace.
1743-
/// Sequences of whitespace
1744-
/// are collapsed, so empty "words" are not included.
1740+
/// An iterator over the non-empty substrings of `self` which contain no whitespace,
1741+
/// and which are separated by any amount of whitespace.
17451742
///
17461743
/// # Examples
17471744
///
17481745
/// ```
17491746
/// # #![feature(str_words)]
1747+
/// # #![allow(deprecated)]
17501748
/// let some_words = " Mary had\ta little \n\t lamb";
17511749
/// let v: Vec<&str> = some_words.words().collect();
17521750
///
17531751
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
17541752
/// ```
1753+
#[deprecated(reason = "words() will be removed. Use split_whitespace() instead",
1754+
since = "1.1.0")]
17551755
#[unstable(feature = "str_words",
17561756
reason = "the precise algorithm to use is unclear")]
1757+
#[allow(deprecated)]
17571758
pub fn words(&self) -> Words {
17581759
UnicodeStr::words(&self[..])
17591760
}
17601761

1762+
/// An iterator over the non-empty substrings of `self` which contain no whitespace,
1763+
/// and which are separated by any amount of whitespace.
1764+
///
1765+
/// # Examples
1766+
///
1767+
/// ```
1768+
/// let some_words = " Mary had\ta little \n\t lamb";
1769+
/// let v: Vec<&str> = some_words.split_whitespace().collect();
1770+
///
1771+
/// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
1772+
/// ```
1773+
#[stable(feature = "split_whitespace", since = "1.1.0")]
1774+
pub fn split_whitespace(&self) -> SplitWhitespace {
1775+
UnicodeStr::split_whitespace(&self[..])
1776+
}
1777+
17611778
/// Returns a string's displayed width in columns.
17621779
///
17631780
/// Control characters have zero width.

src/libcollectionstest/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#![feature(hash)]
1515
#![feature(rand)]
1616
#![feature(rustc_private)]
17-
#![feature(str_words)]
1817
#![feature(test)]
1918
#![feature(unboxed_closures)]
2019
#![feature(unicode)]

src/libcollectionstest/str.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -939,9 +939,9 @@ fn test_rsplitn() {
939939
}
940940

941941
#[test]
942-
fn test_words() {
942+
fn test_split_whitespace() {
943943
let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
944-
let words: Vec<&str> = data.words().collect();
944+
let words: Vec<&str> = data.split_whitespace().collect();
945945
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
946946
}
947947

src/libgetopts/lib.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@
9191

9292
#![deny(missing_docs)]
9393
#![feature(staged_api)]
94-
#![feature(str_words)]
9594
#![feature(str_char)]
9695
#![cfg_attr(test, feature(rustc_private))]
9796

@@ -771,7 +770,7 @@ pub fn usage(brief: &str, opts: &[OptGroup]) -> String {
771770

772771
// Normalize desc to contain words separated by one space character
773772
let mut desc_normalized_whitespace = String::new();
774-
for word in desc.words() {
773+
for word in desc.split_whitespace() {
775774
desc_normalized_whitespace.push_str(word);
776775
desc_normalized_whitespace.push(' ');
777776
}

src/librustc/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
#![feature(staged_api)]
3939
#![feature(std_misc)]
4040
#![feature(path_ext)]
41-
#![feature(str_words)]
4241
#![feature(str_char)]
4342
#![feature(into_cow)]
4443
#![feature(slice_patterns)]

src/librustc/session/config.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ macro_rules! options {
418418
-> bool {
419419
match v {
420420
Some(s) => {
421-
for s in s.words() {
421+
for s in s.split_whitespace() {
422422
slot.push(s.to_string());
423423
}
424424
true
@@ -431,7 +431,7 @@ macro_rules! options {
431431
-> bool {
432432
match v {
433433
Some(s) => {
434-
let v = s.words().map(|s| s.to_string()).collect();
434+
let v = s.split_whitespace().map(|s| s.to_string()).collect();
435435
*slot = Some(v);
436436
true
437437
},

src/librustc_unicode/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ mod u_str;
4545
pub mod char;
4646

4747
pub mod str {
48-
pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices};
48+
pub use u_str::{UnicodeStr, SplitWhitespace, Words, Graphemes, GraphemeIndices};
4949
pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
5050
pub use u_str::{utf16_items, Utf16Encoder};
5151
}

src/librustc_unicode/u_str.rs

+19-5
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,16 @@ use core::str::Split;
2525

2626
use tables::grapheme::GraphemeCat;
2727

28-
/// An iterator over the words of a string, separated by a sequence of whitespace
28+
#[deprecated(reason = "struct Words is being replaced by struct SplitWhitespace",
29+
since = "1.1.0")]
2930
#[unstable(feature = "str_words",
3031
reason = "words() will be replaced by split_whitespace() in 1.1.0")]
31-
pub struct Words<'a> {
32+
pub type Words<'a> = SplitWhitespace<'a>;
33+
34+
/// An iterator over the non-whitespace substrings of a string,
35+
/// separated by any amount of whitespace.
36+
#[stable(feature = "split_whitespace", since = "1.1.0")]
37+
pub struct SplitWhitespace<'a> {
3238
inner: Filter<Split<'a, fn(char) -> bool>, fn(&&str) -> bool>,
3339
}
3440

@@ -37,7 +43,9 @@ pub struct Words<'a> {
3743
pub trait UnicodeStr {
3844
fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>;
3945
fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>;
46+
#[allow(deprecated)]
4047
fn words<'a>(&'a self) -> Words<'a>;
48+
fn split_whitespace<'a>(&'a self) -> SplitWhitespace<'a>;
4149
fn is_whitespace(&self) -> bool;
4250
fn is_alphanumeric(&self) -> bool;
4351
fn width(&self, is_cjk: bool) -> usize;
@@ -57,15 +65,21 @@ impl UnicodeStr for str {
5765
GraphemeIndices { start_offset: self.as_ptr() as usize, iter: self.graphemes(is_extended) }
5866
}
5967

68+
#[allow(deprecated)]
6069
#[inline]
6170
fn words(&self) -> Words {
71+
self.split_whitespace()
72+
}
73+
74+
#[inline]
75+
fn split_whitespace(&self) -> SplitWhitespace {
6276
fn is_not_empty(s: &&str) -> bool { !s.is_empty() }
6377
let is_not_empty: fn(&&str) -> bool = is_not_empty; // coerce to fn pointer
6478

6579
fn is_whitespace(c: char) -> bool { c.is_whitespace() }
6680
let is_whitespace: fn(char) -> bool = is_whitespace; // coerce to fn pointer
6781

68-
Words { inner: self.split(is_whitespace).filter(is_not_empty) }
82+
SplitWhitespace { inner: self.split(is_whitespace).filter(is_not_empty) }
6983
}
7084

7185
#[inline]
@@ -546,11 +560,11 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> {
546560
}
547561
}
548562

549-
impl<'a> Iterator for Words<'a> {
563+
impl<'a> Iterator for SplitWhitespace<'a> {
550564
type Item = &'a str;
551565

552566
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
553567
}
554-
impl<'a> DoubleEndedIterator for Words<'a> {
568+
impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
555569
fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
556570
}

src/librustdoc/html/markdown.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result {
274274
};
275275

276276
// Transform the contents of the header into a hyphenated string
277-
let id = s.words().map(|s| s.to_ascii_lowercase())
277+
let id = s.split_whitespace().map(|s| s.to_ascii_lowercase())
278278
.collect::<Vec<String>>().connect("-");
279279

280280
// This is a terrible hack working around how hoedown gives us rendered

src/librustdoc/lib.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
#![feature(std_misc)]
3232
#![feature(test)]
3333
#![feature(unicode)]
34-
#![feature(str_words)]
3534
#![feature(path_ext)]
3635
#![feature(path_relative_from)]
3736
#![feature(slice_patterns)]
@@ -240,7 +239,7 @@ pub fn main_args(args: &[String]) -> isize {
240239

241240
let test_args = matches.opt_strs("test-args");
242241
let test_args: Vec<String> = test_args.iter()
243-
.flat_map(|s| s.words())
242+
.flat_map(|s| s.split_whitespace())
244243
.map(|s| s.to_string())
245244
.collect();
246245

@@ -404,13 +403,13 @@ fn rust_input(cratefile: &str, externs: core::Externs, matches: &getopts::Matche
404403
}
405404
clean::NameValue(ref x, ref value)
406405
if "passes" == *x => {
407-
for pass in value.words() {
406+
for pass in value.split_whitespace() {
408407
passes.push(pass.to_string());
409408
}
410409
}
411410
clean::NameValue(ref x, ref value)
412411
if "plugins" == *x => {
413-
for p in value.words() {
412+
for p in value.split_whitespace() {
414413
plugins.push(p.to_string());
415414
}
416415
}

src/test/run-pass/drop-with-type-ascription-1.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99
// except according to those terms.
1010

1111

12-
#![feature(str_words)]
13-
1412
fn main() {
1513
let foo = "hello".to_string();
16-
let foo: Vec<&str> = foo.words().collect();
14+
let foo: Vec<&str> = foo.split_whitespace().collect();
1715
let invalid_string = &foo[0];
1816
assert_eq!(*invalid_string, "hello");
1917
}

0 commit comments

Comments
 (0)