about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorKeegan McAllister <kmcallister@mozilla.com>2014-04-11 12:49:31 -0700
committerKeegan McAllister <kmcallister@mozilla.com>2014-04-11 15:20:15 -0700
commite011939b1af554d2a29947feb66f01e27a2a1524 (patch)
tree8aafae4be0c043f239c04f247ae65473dd348d89 /src/libstd
parent8b6091e8f1f5531fe907f84b6a2b27af04a95e8f (diff)
downloadrust-e011939b1af554d2a29947feb66f01e27a2a1524.tar.gz
rust-e011939b1af554d2a29947feb66f01e27a2a1524.zip
Implement Char::encode_utf16
And clean up encode_utf8 a bit.
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/char.rs32
-rw-r--r--src/libstd/str.rs19
2 files changed, 32 insertions, 19 deletions
diff --git a/src/libstd/char.rs b/src/libstd/char.rs
index 702dbcca8be..7137ffadb09 100644
--- a/src/libstd/char.rs
+++ b/src/libstd/char.rs
@@ -560,11 +560,19 @@ pub trait Char {
 
     /// Encodes this character as UTF-8 into the provided byte buffer.
     ///
-    /// The buffer must be at least 4 bytes long or a runtime failure will
+    /// The buffer must be at least 4 bytes long or a runtime failure may
     /// occur.
     ///
-    /// This will then return the number of characters written to the slice.
+    /// This will then return the number of bytes written to the slice.
     fn encode_utf8(&self, dst: &mut [u8]) -> uint;
+
+    /// Encodes this character as UTF-16 into the provided `u16` buffer.
+    ///
+    /// The buffer must be at least 2 elements long or a runtime failure may
+    /// occur.
+    ///
+    /// This will then return the number of `u16`s written to the slice.
+    fn encode_utf16(&self, dst: &mut [u16]) -> uint;
 }
 
 impl Char for char {
@@ -602,7 +610,7 @@ impl Char for char {
 
     fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
 
-    fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
+    fn encode_utf8(&self, dst: &mut [u8]) -> uint {
         let code = *self as uint;
         if code < MAX_ONE_B {
             dst[0] = code as u8;
@@ -624,6 +632,24 @@ impl Char for char {
             return 4;
         }
     }
+
+    fn encode_utf16(&self, dst: &mut [u16]) -> uint {
+        let mut ch = *self as uint;
+        if (ch & 0xFFFF_u) == ch {
+            // The BMP falls through (assuming non-surrogate, as it
+            // should)
+            assert!(ch <= 0xD7FF_u || ch >= 0xE000_u);
+            dst[0] = ch as u16;
+            1
+        } else {
+            // Supplementary planes break into surrogates.
+            assert!(ch >= 0x1_0000_u && ch <= 0x10_FFFF_u);
+            ch -= 0x1_0000_u;
+            dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
+            dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
+            2
+        }
+    }
 }
 
 #[cfg(not(test))]
diff --git a/src/libstd/str.rs b/src/libstd/str.rs
index 904c19b40ed..449329ce63a 100644
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -2555,22 +2555,9 @@ impl<'a> StrSlice<'a> for &'a str {
     fn to_utf16(&self) -> ~[u16] {
         let mut u = ~[];
         for ch in self.chars() {
-            // Arithmetic with u32 literals is easier on the eyes than chars.
-            let mut ch = ch as u32;
-
-            if (ch & 0xFFFF_u32) == ch {
-                // The BMP falls through (assuming non-surrogate, as it
-                // should)
-                assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
-                u.push(ch as u16)
-            } else {
-                // Supplementary planes break into surrogates.
-                assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
-                ch -= 0x1_0000_u32;
-                let w1 = 0xD800_u16 | ((ch >> 10) as u16);
-                let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
-                u.push_all([w1, w2])
-            }
+            let mut buf = [0u16, ..2];
+            let n = ch.encode_utf16(buf /* as mut slice! */);
+            u.push_all(buf.slice_to(n));
         }
         u
     }