about summary refs log tree commit diff
path: root/src/libcore/char.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/libcore/char.rs')
-rw-r--r--src/libcore/char.rs96
1 files changed, 58 insertions, 38 deletions
diff --git a/src/libcore/char.rs b/src/libcore/char.rs
index caac894c0da..0e6b634bd11 100644
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@@ -258,49 +258,69 @@ impl CharExt for char {
     #[inline]
     #[unstable = "pending decision about Iterator/Writer/Reader"]
     fn encode_utf8(self, dst: &mut [u8]) -> Option<uint> {
-        // Marked #[inline] to allow llvm optimizing it away
-        let code = self as u32;
-        if code < MAX_ONE_B && dst.len() >= 1 {
-            dst[0] = code as u8;
-            Some(1)
-        } else if code < MAX_TWO_B && dst.len() >= 2 {
-            dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
-            dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
-            Some(2)
-        } else if code < MAX_THREE_B && dst.len() >= 3  {
-            dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
-            dst[1] = (code >>  6u & 0x3F_u32) as u8 | TAG_CONT;
-            dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
-            Some(3)
-        } else if dst.len() >= 4 {
-            dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
-            dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
-            dst[2] = (code >>  6u & 0x3F_u32) as u8 | TAG_CONT;
-            dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
-            Some(4)
-        } else {
-            None
-        }
+        encode_utf8_raw(self as u32, dst)
     }
 
     #[inline]
     #[unstable = "pending decision about Iterator/Writer/Reader"]
     fn encode_utf16(self, dst: &mut [u16]) -> Option<uint> {
-        // Marked #[inline] to allow llvm optimizing it away
-        let mut ch = self as u32;
-        if (ch & 0xFFFF_u32) == ch  && dst.len() >= 1 {
-            // The BMP falls through (assuming non-surrogate, as it should)
-            dst[0] = ch as u16;
-            Some(1)
-        } else if dst.len() >= 2 {
-            // Supplementary planes break into surrogates.
-            ch -= 0x1_0000_u32;
-            dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
-            dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
-            Some(2)
-        } else {
-            None
-        }
+        encode_utf16_raw(self as u32, dst)
+    }
+}
+
+/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
+/// and then returns the number of bytes written.
+///
+/// If the buffer is not large enough, nothing will be written into it
+/// and a `None` will be returned.
+#[inline]
+#[unstable]
+pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<uint> {
+    // Marked #[inline] to allow llvm optimizing it away
+    if code < MAX_ONE_B && dst.len() >= 1 {
+        dst[0] = code as u8;
+        Some(1)
+    } else if code < MAX_TWO_B && dst.len() >= 2 {
+        dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
+        dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
+        Some(2)
+    } else if code < MAX_THREE_B && dst.len() >= 3  {
+        dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
+        dst[1] = (code >>  6u & 0x3F_u32) as u8 | TAG_CONT;
+        dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
+        Some(3)
+    } else if dst.len() >= 4 {
+        dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
+        dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
+        dst[2] = (code >>  6u & 0x3F_u32) as u8 | TAG_CONT;
+        dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
+        Some(4)
+    } else {
+        None
+    }
+}
+
+/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
+/// and then returns the number of `u16`s written.
+///
+/// If the buffer is not large enough, nothing will be written into it
+/// and a `None` will be returned.
+#[inline]
+#[unstable]
+pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<uint> {
+    // Marked #[inline] to allow llvm optimizing it away
+    if (ch & 0xFFFF_u32) == ch  && dst.len() >= 1 {
+        // The BMP falls through (assuming non-surrogate, as it should)
+        dst[0] = ch as u16;
+        Some(1)
+    } else if dst.len() >= 2 {
+        // Supplementary planes break into surrogates.
+        ch -= 0x1_0000_u32;
+        dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
+        dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
+        Some(2)
+    } else {
+        None
     }
 }