about summary refs log tree commit diff
path: root/src/libcore
diff options
context:
space:
mode:
Diffstat (limited to 'src/libcore')
-rw-r--r--src/libcore/char.rs189
-rw-r--r--src/libcore/fmt/mod.rs27
2 files changed, 132 insertions, 84 deletions
diff --git a/src/libcore/char.rs b/src/libcore/char.rs
index 5b39fa42c6e..b2b1dc5178e 100644
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@@ -269,10 +269,10 @@ pub trait CharExt {
     fn len_utf8(self) -> usize;
     #[stable(feature = "core", since = "1.6.0")]
     fn len_utf16(self) -> usize;
-    #[stable(feature = "core", since = "1.6.0")]
-    fn encode_utf8(self, dst: &mut [u8]) -> Option<usize>;
-    #[stable(feature = "core", since = "1.6.0")]
-    fn encode_utf16(self, dst: &mut [u16]) -> Option<usize>;
+    #[unstable(feature = "unicode", issue = "27784")]
+    fn encode_utf8(self) -> EncodeUtf8;
+    #[unstable(feature = "unicode", issue = "27784")]
+    fn encode_utf16(self) -> EncodeUtf16;
 }
 
 #[stable(feature = "core", since = "1.6.0")]
@@ -336,75 +336,47 @@ impl CharExt for char {
     }
 
     #[inline]
-    fn encode_utf8(self, dst: &mut [u8]) -> Option<usize> {
-        encode_utf8_raw(self as u32, dst)
+    fn encode_utf8(self) -> EncodeUtf8 {
+        let code = self as u32;
+        let mut buf = [0; 4];
+        let pos = if code < MAX_ONE_B {
+            buf[3] = code as u8;
+            3
+        } else if code < MAX_TWO_B {
+            buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
+            buf[3] = (code & 0x3F) as u8 | TAG_CONT;
+            2
+        } else if code < MAX_THREE_B {
+            buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
+            buf[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
+            buf[3] = (code & 0x3F) as u8 | TAG_CONT;
+            1
+        } else {
+            buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
+            buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
+            buf[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
+            buf[3] = (code & 0x3F) as u8 | TAG_CONT;
+            0
+        };
+        EncodeUtf8 { buf: buf, pos: pos }
     }
 
     #[inline]
-    fn encode_utf16(self, dst: &mut [u16]) -> Option<usize> {
-        encode_utf16_raw(self as u32, dst)
-    }
-}
-
-/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
-/// and then returns the number of bytes written.
-///
-/// If the buffer is not large enough, nothing will be written into it
-/// and a `None` will be returned.
-#[inline]
-#[unstable(feature = "char_internals",
-           reason = "this function should not be exposed publicly",
-           issue = "0")]
-#[doc(hidden)]
-pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<usize> {
-    // Marked #[inline] to allow llvm optimizing it away
-    if code < MAX_ONE_B && !dst.is_empty() {
-        dst[0] = code as u8;
-        Some(1)
-    } else if code < MAX_TWO_B && dst.len() >= 2 {
-        dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
-        dst[1] = (code & 0x3F) as u8 | TAG_CONT;
-        Some(2)
-    } else if code < MAX_THREE_B && dst.len() >= 3  {
-        dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
-        dst[1] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
-        dst[2] = (code & 0x3F) as u8 | TAG_CONT;
-        Some(3)
-    } else if dst.len() >= 4 {
-        dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
-        dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
-        dst[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
-        dst[3] = (code & 0x3F) as u8 | TAG_CONT;
-        Some(4)
-    } else {
-        None
-    }
-}
-
-/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
-/// and then returns the number of `u16`s written.
-///
-/// If the buffer is not large enough, nothing will be written into it
-/// and a `None` will be returned.
-#[inline]
-#[unstable(feature = "char_internals",
-           reason = "this function should not be exposed publicly",
-           issue = "0")]
-#[doc(hidden)]
-pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<usize> {
-    // Marked #[inline] to allow llvm optimizing it away
-    if (ch & 0xFFFF) == ch && !dst.is_empty() {
-        // The BMP falls through (assuming non-surrogate, as it should)
-        dst[0] = ch as u16;
-        Some(1)
-    } else if dst.len() >= 2 {
-        // Supplementary planes break into surrogates.
-        ch -= 0x1_0000;
-        dst[0] = 0xD800 | ((ch >> 10) as u16);
-        dst[1] = 0xDC00 | ((ch as u16) & 0x3FF);
-        Some(2)
-    } else {
-        None
+    fn encode_utf16(self) -> EncodeUtf16 {
+        let mut buf = [0; 2];
+        let mut code = self as u32;
+        let pos = if (code & 0xFFFF) == code {
+            // The BMP falls through (assuming non-surrogate, as it should)
+            buf[1] = code as u16;
+            1
+        } else {
+            // Supplementary planes break into surrogates.
+            code -= 0x1_0000;
+            buf[0] = 0xD800 | ((code >> 10) as u16);
+            buf[1] = 0xDC00 | ((code as u16) & 0x3FF);
+            0
+        };
+        EncodeUtf16 { buf: buf, pos: pos }
     }
 }
 
@@ -583,3 +555,80 @@ impl Iterator for EscapeDefault {
         }
     }
 }
+
+/// An iterator over `u8` entries represending the UTF-8 encoding of a `char`
+/// value.
+///
+/// Constructed via the `.encode_utf8()` method on `char`.
+#[unstable(feature = "unicode", issue = "27784")]
+#[derive(Debug)]
+pub struct EncodeUtf8 {
+    buf: [u8; 4],
+    pos: usize,
+}
+
+impl EncodeUtf8 {
+    /// Returns the remaining bytes of this iterator as a slice.
+    #[unstable(feature = "unicode", issue = "27784")]
+    pub fn as_slice(&self) -> &[u8] {
+        &self.buf[self.pos..]
+    }
+}
+
+#[unstable(feature = "unicode", issue = "27784")]
+impl Iterator for EncodeUtf8 {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<u8> {
+        if self.pos == self.buf.len() {
+            None
+        } else {
+            let ret = Some(self.buf[self.pos]);
+            self.pos += 1;
+            ret
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.as_slice().iter().size_hint()
+    }
+}
+
+/// An iterator over `u16` entries represending the UTF-16 encoding of a `char`
+/// value.
+///
+/// Constructed via the `.encode_utf16()` method on `char`.
+#[unstable(feature = "unicode", issue = "27784")]
+#[derive(Debug)]
+pub struct EncodeUtf16 {
+    buf: [u16; 2],
+    pos: usize,
+}
+
+impl EncodeUtf16 {
+    /// Returns the remaining bytes of this iterator as a slice.
+    #[unstable(feature = "unicode", issue = "27784")]
+    pub fn as_slice(&self) -> &[u16] {
+        &self.buf[self.pos..]
+    }
+}
+
+
+#[unstable(feature = "unicode", issue = "27784")]
+impl Iterator for EncodeUtf16 {
+    type Item = u16;
+
+    fn next(&mut self) -> Option<u16> {
+        if self.pos == self.buf.len() {
+            None
+        } else {
+            let ret = Some(self.buf[self.pos]);
+            self.pos += 1;
+            ret
+        }
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.as_slice().iter().size_hint()
+    }
+}
diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs
index d2da16624ca..5617b6d63a7 100644
--- a/src/libcore/fmt/mod.rs
+++ b/src/libcore/fmt/mod.rs
@@ -99,9 +99,9 @@ pub trait Write {
     /// This function will return an instance of `Error` on error.
     #[stable(feature = "fmt_write_char", since = "1.1.0")]
     fn write_char(&mut self, c: char) -> Result {
-        let mut utf_8 = [0u8; 4];
-        let bytes_written = c.encode_utf8(&mut utf_8).unwrap_or(0);
-        self.write_str(unsafe { str::from_utf8_unchecked(&utf_8[..bytes_written]) })
+        self.write_str(unsafe {
+            str::from_utf8_unchecked(c.encode_utf8().as_slice())
+        })
     }
 
     /// Glue for usage of the `write!` macro with implementors of this trait.
@@ -897,10 +897,9 @@ impl<'a> Formatter<'a> {
         // Writes the sign if it exists, and then the prefix if it was requested
         let write_prefix = |f: &mut Formatter| {
             if let Some(c) = sign {
-                let mut b = [0; 4];
-                let n = c.encode_utf8(&mut b).unwrap_or(0);
-                let b = unsafe { str::from_utf8_unchecked(&b[..n]) };
-                try!(f.buf.write_str(b));
+                try!(f.buf.write_str(unsafe {
+                    str::from_utf8_unchecked(c.encode_utf8().as_slice())
+                }));
             }
             if prefixed { f.buf.write_str(prefix) }
             else { Ok(()) }
@@ -1003,9 +1002,10 @@ impl<'a> Formatter<'a> {
             rt::v1::Alignment::Center => (padding / 2, (padding + 1) / 2),
         };
 
-        let mut fill = [0; 4];
-        let len = self.fill.encode_utf8(&mut fill).unwrap_or(0);
-        let fill = unsafe { str::from_utf8_unchecked(&fill[..len]) };
+        let fill = self.fill.encode_utf8();
+        let fill = unsafe {
+            str::from_utf8_unchecked(fill.as_slice())
+        };
 
         for _ in 0..pre_pad {
             try!(self.buf.write_str(fill));
@@ -1391,10 +1391,9 @@ impl Display for char {
         if f.width.is_none() && f.precision.is_none() {
             f.write_char(*self)
         } else {
-            let mut utf8 = [0; 4];
-            let amt = self.encode_utf8(&mut utf8).unwrap_or(0);
-            let s: &str = unsafe { str::from_utf8_unchecked(&utf8[..amt]) };
-            f.pad(s)
+            f.pad(unsafe {
+                str::from_utf8_unchecked(self.encode_utf8().as_slice())
+            })
         }
     }
 }