Refactor low-level UTF-16 decoding.

* Rename `utf16_items` to `decode_utf16`. "Items" is meaningless. * Move it to `rustc_unicode::char`, exposed in `std::char`. * Generalize it to any `u16` iterable, not just `&[u16]`. * Make it yield `Result` instead of a custom `Utf16Item` enum that was isomorphic to `Result`. This enable using the `FromIterator for Result` impl. * Add a `REPLACEMENT_CHARACTER` constant. * Document how `result.unwrap_or(REPLACEMENT_CHARACTER)` replaces `Utf16Item::to_char_lossy`.
author: Simon Sapin <simon.sapin@exyr.org> 2015-08-13 18:39:46 +0200
committer: Simon Sapin <simon.sapin@exyr.org> 2015-08-23 00:28:56 +0200
commit: 6174b8d726ed5764694e5404329d8b5e66517ed5 (patch)
tree: 47dd9d787f7550a5d47301652fc5081446f6495e /src/libstd
parent: c408b7863389aa2bdb253ffa363e693bcd02439f (diff)
download: rust-6174b8d726ed5764694e5404329d8b5e66517ed5.tar.gz
rust-6174b8d726ed5764694e5404329d8b5e66517ed5.zip
2 files changed, 6 insertions, 6 deletions
diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs
index 179f0727d46..fca4c66112e 100644
--- a/src/libstd/lib.rs
+++ b/src/libstd/lib.rs
@@ -242,6 +242,7 @@
 #![feature(unicode)]
 #![feature(unique)]
 #![feature(unsafe_no_drop_flag, filling_drop)]
+#![feature(decode_utf16)]
 #![feature(vec_push_all)]
 #![feature(vec_resize)]
 #![feature(wrapping)]
diff --git a/src/libstd/sys/common/wtf8.rs b/src/libstd/sys/common/wtf8.rs
index 9e4a80a411b..eb313d275a1 100644
--- a/src/libstd/sys/common/wtf8.rs
+++ b/src/libstd/sys/common/wtf8.rs
@@ -37,7 +37,6 @@ use hash::{Hash, Hasher};
 use iter::FromIterator;
 use mem;
 use ops;
-use rustc_unicode::str::{Utf16Item, utf16_items};
 use slice;
 use str;
 use string::String;
@@ -186,14 +185,14 @@ impl Wtf8Buf {
     /// will always return the original code units.
     pub fn from_wide(v: &[u16]) -> Wtf8Buf {
         let mut string = Wtf8Buf::with_capacity(v.len());
-        for item in utf16_items(v) {
+        for item in char::decode_utf16(v.iter().cloned()) {
             match item {
-                Utf16Item::ScalarValue(c) => string.push_char(c),
-                Utf16Item::LoneSurrogate(s) => {
+                Ok(ch) => string.push_char(ch),
+                Err(surrogate) => {
                     // Surrogates are known to be in the code point range.
-                    let code_point = unsafe { CodePoint::from_u32_unchecked(s as u32) };
+                    let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) };
                     // Skip the WTF-8 concatenation check,
-                    // surrogate pairs are already decoded by utf16_items
+                    // surrogate pairs are already decoded by decode_utf16
                     string.push_code_point_unchecked(code_point)
                 }
             }
author	Simon Sapin <simon.sapin@exyr.org>	2015-08-13 18:39:46 +0200
committer	Simon Sapin <simon.sapin@exyr.org>	2015-08-23 00:28:56 +0200
commit	6174b8d726ed5764694e5404329d8b5e66517ed5 (patch)
tree	47dd9d787f7550a5d47301652fc5081446f6495e /src/libstd
parent	c408b7863389aa2bdb253ffa363e693bcd02439f (diff)
download	rust-6174b8d726ed5764694e5404329d8b5e66517ed5.tar.gz rust-6174b8d726ed5764694e5404329d8b5e66517ed5.zip