diff options
| author | Simon Sapin <simon.sapin@exyr.org> | 2015-08-13 18:39:46 +0200 |
|---|---|---|
| committer | Simon Sapin <simon.sapin@exyr.org> | 2015-08-23 00:28:56 +0200 |
| commit | 6174b8d726ed5764694e5404329d8b5e66517ed5 (patch) | |
| tree | 47dd9d787f7550a5d47301652fc5081446f6495e /src/libstd | |
| parent | c408b7863389aa2bdb253ffa363e693bcd02439f (diff) | |
| download | rust-6174b8d726ed5764694e5404329d8b5e66517ed5.tar.gz rust-6174b8d726ed5764694e5404329d8b5e66517ed5.zip | |
Refactor low-level UTF-16 decoding.
* Rename `utf16_items` to `decode_utf16`. "Items" is meaningless. * Move it to `rustc_unicode::char`, exposed in `std::char`. * Generalize it to any `u16` iterable, not just `&[u16]`. * Make it yield `Result` instead of a custom `Utf16Item` enum that was isomorphic to `Result`. This enable using the `FromIterator for Result` impl. * Add a `REPLACEMENT_CHARACTER` constant. * Document how `result.unwrap_or(REPLACEMENT_CHARACTER)` replaces `Utf16Item::to_char_lossy`.
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/lib.rs | 1 | ||||
| -rw-r--r-- | src/libstd/sys/common/wtf8.rs | 11 |
2 files changed, 6 insertions, 6 deletions
diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index 179f0727d46..fca4c66112e 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -242,6 +242,7 @@ #![feature(unicode)] #![feature(unique)] #![feature(unsafe_no_drop_flag, filling_drop)] +#![feature(decode_utf16)] #![feature(vec_push_all)] #![feature(vec_resize)] #![feature(wrapping)] diff --git a/src/libstd/sys/common/wtf8.rs b/src/libstd/sys/common/wtf8.rs index 9e4a80a411b..eb313d275a1 100644 --- a/src/libstd/sys/common/wtf8.rs +++ b/src/libstd/sys/common/wtf8.rs @@ -37,7 +37,6 @@ use hash::{Hash, Hasher}; use iter::FromIterator; use mem; use ops; -use rustc_unicode::str::{Utf16Item, utf16_items}; use slice; use str; use string::String; @@ -186,14 +185,14 @@ impl Wtf8Buf { /// will always return the original code units. pub fn from_wide(v: &[u16]) -> Wtf8Buf { let mut string = Wtf8Buf::with_capacity(v.len()); - for item in utf16_items(v) { + for item in char::decode_utf16(v.iter().cloned()) { match item { - Utf16Item::ScalarValue(c) => string.push_char(c), - Utf16Item::LoneSurrogate(s) => { + Ok(ch) => string.push_char(ch), + Err(surrogate) => { // Surrogates are known to be in the code point range. - let code_point = unsafe { CodePoint::from_u32_unchecked(s as u32) }; + let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) }; // Skip the WTF-8 concatenation check, - // surrogate pairs are already decoded by utf16_items + // surrogate pairs are already decoded by decode_utf16 string.push_code_point_unchecked(code_point) } } |
