diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 6221888f5e55e..8071c7e8c20d5 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -540,71 +540,6 @@ fn from_utf8_mostly_ascii() { } } -#[test] -fn test_is_utf16() { - use std_unicode::str::is_utf16; - - macro_rules! pos { - ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } } - } - - // non-surrogates - pos!(&[0x0000], - &[0x0001, 0x0002], - &[0xD7FF], - &[0xE000]); - - // surrogate pairs (randomly generated with Python 3's - // .encode('utf-16be')) - pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45], - &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14], - &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]); - - // mixtures (also random) - pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65], - &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006], - &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]); - - // negative tests - macro_rules! neg { - ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } } - } - - neg!( - // surrogate + regular unit - &[0xdb45, 0x0000], - // surrogate + lead surrogate - &[0xd900, 0xd900], - // unterminated surrogate - &[0xd8ff], - // trail surrogate without a lead - &[0xddb7]); - - // random byte sequences that Python 3's .decode('utf-16be') - // failed on - neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7], - &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3], - &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca], - &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278], - &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e], - &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5], - &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee], - &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7], - &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a], - &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a], - &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe], - &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf], - &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e], - &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5], - &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f], - &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b], - &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7], - &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9], - &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8], - &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282], - &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]); -} - #[test] fn test_as_bytes() { // no null diff --git a/src/libcollectionstest/string.rs b/src/libcollectionstest/string.rs index f77dd510303c7..fcb914f711d94 100644 --- a/src/libcollectionstest/string.rs +++ b/src/libcollectionstest/string.rs @@ -129,7 +129,7 @@ fn test_from_utf16() { let s_as_utf16 = s.encode_utf16().collect::>(); let u_as_string = String::from_utf16(&u).unwrap(); - assert!(::std_unicode::str::is_utf16(&u)); + assert!(::std_unicode::char::decode_utf16(&u).all(|r| r.is_ok())); assert_eq!(s_as_utf16, u); assert_eq!(u_as_string, s); diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs index d52d1549b5173..79a8c69088c8c 100644 --- a/src/libstd_unicode/lib.rs +++ b/src/libstd_unicode/lib.rs @@ -47,7 +47,7 @@ pub mod char; #[allow(deprecated)] pub mod str { pub use u_str::{SplitWhitespace, UnicodeStr}; - pub use u_str::{is_utf16, utf8_char_width}; + pub use u_str::utf8_char_width; pub use u_str::Utf16Encoder; } diff --git a/src/libstd_unicode/u_str.rs b/src/libstd_unicode/u_str.rs index 1c7894794c9c8..40c49eb213f91 100644 --- a/src/libstd_unicode/u_str.rs +++ b/src/libstd_unicode/u_str.rs @@ -103,28 +103,6 @@ pub fn utf8_char_width(b: u8) -> usize { return UTF8_CHAR_WIDTH[b as usize] as usize; } -/// Determines if a vector of `u16` contains valid UTF-16 -pub fn is_utf16(v: &[u16]) -> bool { - let mut it = v.iter(); - macro_rules! next { ($ret:expr) => { - match it.next() { Some(u) => *u, None => return $ret } - } - } - loop { - let u = next!(true); - - match char::from_u32(u as u32) { - Some(_) => {} - None => { - let u2 = next!(false); - if u < 0xD7FF || u > 0xDBFF || u2 < 0xDC00 || u2 > 0xDFFF { - return false; - } - } - } - } -} - /// Iterator adaptor for encoding `char`s to UTF-16. #[derive(Clone)] pub struct Utf16Encoder {