about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorSimon Sapin <simon.sapin@exyr.org>2015-08-13 18:39:46 +0200
committerSimon Sapin <simon.sapin@exyr.org>2015-08-23 00:28:56 +0200
commit6174b8d726ed5764694e5404329d8b5e66517ed5 (patch)
tree47dd9d787f7550a5d47301652fc5081446f6495e /src/libstd
parentc408b7863389aa2bdb253ffa363e693bcd02439f (diff)
downloadrust-6174b8d726ed5764694e5404329d8b5e66517ed5.tar.gz
rust-6174b8d726ed5764694e5404329d8b5e66517ed5.zip
Refactor low-level UTF-16 decoding.
* Rename `utf16_items` to `decode_utf16`. "Items" is meaningless.
* Move it to `rustc_unicode::char`, exposed in `std::char`.
* Generalize it to any `u16` iterable, not just `&[u16]`.
* Make it yield `Result` instead of a custom `Utf16Item` enum that was isomorphic to `Result`. This enable using the `FromIterator for Result` impl.
* Add a `REPLACEMENT_CHARACTER` constant.
* Document how `result.unwrap_or(REPLACEMENT_CHARACTER)` replaces `Utf16Item::to_char_lossy`.
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/lib.rs1
-rw-r--r--src/libstd/sys/common/wtf8.rs11
2 files changed, 6 insertions, 6 deletions
diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs
index 179f0727d46..fca4c66112e 100644
--- a/src/libstd/lib.rs
+++ b/src/libstd/lib.rs
@@ -242,6 +242,7 @@
 #![feature(unicode)]
 #![feature(unique)]
 #![feature(unsafe_no_drop_flag, filling_drop)]
+#![feature(decode_utf16)]
 #![feature(vec_push_all)]
 #![feature(vec_resize)]
 #![feature(wrapping)]
diff --git a/src/libstd/sys/common/wtf8.rs b/src/libstd/sys/common/wtf8.rs
index 9e4a80a411b..eb313d275a1 100644
--- a/src/libstd/sys/common/wtf8.rs
+++ b/src/libstd/sys/common/wtf8.rs
@@ -37,7 +37,6 @@ use hash::{Hash, Hasher};
 use iter::FromIterator;
 use mem;
 use ops;
-use rustc_unicode::str::{Utf16Item, utf16_items};
 use slice;
 use str;
 use string::String;
@@ -186,14 +185,14 @@ impl Wtf8Buf {
     /// will always return the original code units.
     pub fn from_wide(v: &[u16]) -> Wtf8Buf {
         let mut string = Wtf8Buf::with_capacity(v.len());
-        for item in utf16_items(v) {
+        for item in char::decode_utf16(v.iter().cloned()) {
             match item {
-                Utf16Item::ScalarValue(c) => string.push_char(c),
-                Utf16Item::LoneSurrogate(s) => {
+                Ok(ch) => string.push_char(ch),
+                Err(surrogate) => {
                     // Surrogates are known to be in the code point range.
-                    let code_point = unsafe { CodePoint::from_u32_unchecked(s as u32) };
+                    let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) };
                     // Skip the WTF-8 concatenation check,
-                    // surrogate pairs are already decoded by utf16_items
+                    // surrogate pairs are already decoded by decode_utf16
                     string.push_code_point_unchecked(code_point)
                 }
             }