Skip to content

Commit 031f9b1

Browse files
committed
Only keep one copy of the UTF8_CHAR_WIDTH table.
… instead of one of each of libcore and libstd_unicode. Move the `utf8_char_width` function to `core::str` under the `str_internals` unstable feature.
1 parent 691eba1 commit 031f9b1

File tree

6 files changed

+12
-30
lines changed

6 files changed

+12
-30
lines changed

src/libcollections/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#![feature(slice_patterns)]
5555
#![feature(specialization)]
5656
#![feature(staged_api)]
57+
#![feature(str_internals)]
5758
#![feature(trusted_len)]
5859
#![feature(unicode)]
5960
#![feature(unique)]

src/libcollections/string.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ use core::iter::{FromIterator, FusedIterator};
6262
use core::mem;
6363
use core::ops::{self, Add, AddAssign, Index, IndexMut};
6464
use core::ptr;
65+
use core::str as core_str;
6566
use core::str::pattern::Pattern;
6667
use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};
67-
use std_unicode::str as unicode_str;
6868

6969
use borrow::{Cow, ToOwned};
7070
use range::RangeArgument;
@@ -575,7 +575,7 @@ impl String {
575575
if byte < 128 {
576576
// subseqidx handles this
577577
} else {
578-
let w = unicode_str::utf8_char_width(byte);
578+
let w = core_str::utf8_char_width(byte);
579579

580580
match w {
581581
2 => {

src/libcore/str/mod.rs

+7
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,13 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
13521352
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
13531353
];
13541354

1355+
/// Given a first byte, determine how many bytes are in this UTF-8 character
1356+
#[unstable(feature = "str_internals", issue = "0")]
1357+
#[inline]
1358+
pub fn utf8_char_width(b: u8) -> usize {
1359+
return UTF8_CHAR_WIDTH[b as usize] as usize;
1360+
}
1361+
13551362
/// Mask of the value bits of a continuation byte
13561363
const CONT_MASK: u8 = 0b0011_1111;
13571364
/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte

src/libstd/io/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@
256256
#![stable(feature = "rust1", since = "1.0.0")]
257257

258258
use cmp;
259-
use std_unicode::str as core_str;
259+
use core::str as core_str;
260260
use error as std_error;
261261
use fmt;
262262
use result;

src/libstd_unicode/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub mod char;
4747
#[allow(deprecated)]
4848
pub mod str {
4949
pub use u_str::{SplitWhitespace, UnicodeStr};
50-
pub use u_str::{is_utf16, utf8_char_width};
50+
pub use u_str::is_utf16;
5151
pub use u_str::Utf16Encoder;
5252
}
5353

src/libstd_unicode/u_str.rs

-26
Original file line numberDiff line numberDiff line change
@@ -77,32 +77,6 @@ impl UnicodeStr for str {
7777
}
7878
}
7979

80-
// https://tools.ietf.org/html/rfc3629
81-
static UTF8_CHAR_WIDTH: [u8; 256] = [
82-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
83-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
84-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
85-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
86-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
87-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
88-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
89-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
90-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
91-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
92-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
93-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
94-
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
95-
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
96-
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
97-
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
98-
];
99-
100-
/// Given a first byte, determine how many bytes are in this UTF-8 character
101-
#[inline]
102-
pub fn utf8_char_width(b: u8) -> usize {
103-
return UTF8_CHAR_WIDTH[b as usize] as usize;
104-
}
105-
10680
/// Determines if a vector of `u16` contains valid UTF-16
10781
pub fn is_utf16(v: &[u16]) -> bool {
10882
let mut it = v.iter();

0 commit comments

Comments
 (0)