diff options
| author | Simon Sapin <simon.sapin@exyr.org> | 2017-03-01 23:01:09 +0100 |
|---|---|---|
| committer | Simon Sapin <simon.sapin@exyr.org> | 2017-03-02 17:45:50 +0100 |
| commit | 24b39c51af8b7320fd825a66a239a497f20b0ece (patch) | |
| tree | 2d085a13de1541bb72d186096e7dc695bc21bda5 | |
| parent | 031f9b15df3df5da19b64a1f824463053898d021 (diff) | |
| download | rust-24b39c51af8b7320fd825a66a239a497f20b0ece.tar.gz rust-24b39c51af8b7320fd825a66a239a497f20b0ece.zip | |
Remove std_unicode::str::is_utf16
It was only accessible through the `#[unstable]` crate std_unicode.
It has never been used in the compiler or standard library
since 47e7a05a28c9662159af2d2e0f2b7efc13fa09cb added it in 2012
“for OS API interop”.
It can be replaced with a one-liner:
```rust
fn is_utf16(slice: &[u16]) -> bool {
std::char::decode_utf16(s.iter().cloned()).all(|r| r.is_ok())
}
```
| -rw-r--r-- | src/libcollectionstest/str.rs | 65 | ||||
| -rw-r--r-- | src/libcollectionstest/string.rs | 2 | ||||
| -rw-r--r-- | src/libstd_unicode/lib.rs | 1 | ||||
| -rw-r--r-- | src/libstd_unicode/u_str.rs | 22 |
4 files changed, 1 insertions, 89 deletions
diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 6221888f5e5..8071c7e8c20 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -541,71 +541,6 @@ fn from_utf8_mostly_ascii() { } #[test] -fn test_is_utf16() { - use std_unicode::str::is_utf16; - - macro_rules! pos { - ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } } - } - - // non-surrogates - pos!(&[0x0000], - &[0x0001, 0x0002], - &[0xD7FF], - &[0xE000]); - - // surrogate pairs (randomly generated with Python 3's - // .encode('utf-16be')) - pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45], - &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14], - &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]); - - // mixtures (also random) - pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65], - &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006], - &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]); - - // negative tests - macro_rules! neg { - ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } } - } - - neg!( - // surrogate + regular unit - &[0xdb45, 0x0000], - // surrogate + lead surrogate - &[0xd900, 0xd900], - // unterminated surrogate - &[0xd8ff], - // trail surrogate without a lead - &[0xddb7]); - - // random byte sequences that Python 3's .decode('utf-16be') - // failed on - neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7], - &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3], - &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca], - &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278], - &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e], - &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5], - &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee], - &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7], - &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a], - &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a], - &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe], - &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf], - &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e], - &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5], - &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f], - &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b], - &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7], - &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9], - &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8], - &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282], - &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]); -} - -#[test] fn test_as_bytes() { // no null let v = [ diff --git a/src/libcollectionstest/string.rs b/src/libcollectionstest/string.rs index f77dd510303..2f021b9935d 100644 --- a/src/libcollectionstest/string.rs +++ b/src/libcollectionstest/string.rs @@ -129,7 +129,7 @@ fn test_from_utf16() { let s_as_utf16 = s.encode_utf16().collect::<Vec<u16>>(); let u_as_string = String::from_utf16(&u).unwrap(); - assert!(::std_unicode::str::is_utf16(&u)); + assert!(::std_unicode::char::decode_utf16(u.iter().cloned()).all(|r| r.is_ok())); assert_eq!(s_as_utf16, u); assert_eq!(u_as_string, s); diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs index 1adf00e40f1..7e5ab1a54ab 100644 --- a/src/libstd_unicode/lib.rs +++ b/src/libstd_unicode/lib.rs @@ -47,7 +47,6 @@ pub mod char; #[allow(deprecated)] pub mod str { pub use u_str::{SplitWhitespace, UnicodeStr}; - pub use u_str::is_utf16; pub use u_str::Utf16Encoder; } diff --git a/src/libstd_unicode/u_str.rs b/src/libstd_unicode/u_str.rs index 0ca6db9b0de..3c02ea82d2a 100644 --- a/src/libstd_unicode/u_str.rs +++ b/src/libstd_unicode/u_str.rs @@ -77,28 +77,6 @@ impl UnicodeStr for str { } } -/// Determines if a vector of `u16` contains valid UTF-16 -pub fn is_utf16(v: &[u16]) -> bool { - let mut it = v.iter(); - macro_rules! next { ($ret:expr) => { - match it.next() { Some(u) => *u, None => return $ret } - } - } - loop { - let u = next!(true); - - match char::from_u32(u as u32) { - Some(_) => {} - None => { - let u2 = next!(false); - if u < 0xD7FF || u > 0xDBFF || u2 < 0xDC00 || u2 > 0xDFFF { - return false; - } - } - } - } -} - /// Iterator adaptor for encoding `char`s to UTF-16. #[derive(Clone)] pub struct Utf16Encoder<I> { |
