diff options
Diffstat (limited to 'src/libcore/char.rs')
| -rw-r--r-- | src/libcore/char.rs | 96 |
1 files changed, 58 insertions, 38 deletions
diff --git a/src/libcore/char.rs b/src/libcore/char.rs index caac894c0da..0e6b634bd11 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -258,49 +258,69 @@ impl CharExt for char { #[inline] #[unstable = "pending decision about Iterator/Writer/Reader"] fn encode_utf8(self, dst: &mut [u8]) -> Option<uint> { - // Marked #[inline] to allow llvm optimizing it away - let code = self as u32; - if code < MAX_ONE_B && dst.len() >= 1 { - dst[0] = code as u8; - Some(1) - } else if code < MAX_TWO_B && dst.len() >= 2 { - dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B; - dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(2) - } else if code < MAX_THREE_B && dst.len() >= 3 { - dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B; - dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; - dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(3) - } else if dst.len() >= 4 { - dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B; - dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT; - dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; - dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(4) - } else { - None - } + encode_utf8_raw(self as u32, dst) } #[inline] #[unstable = "pending decision about Iterator/Writer/Reader"] fn encode_utf16(self, dst: &mut [u16]) -> Option<uint> { - // Marked #[inline] to allow llvm optimizing it away - let mut ch = self as u32; - if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 { - // The BMP falls through (assuming non-surrogate, as it should) - dst[0] = ch as u16; - Some(1) - } else if dst.len() >= 2 { - // Supplementary planes break into surrogates. - ch -= 0x1_0000_u32; - dst[0] = 0xD800_u16 | ((ch >> 10) as u16); - dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); - Some(2) - } else { - None - } + encode_utf16_raw(self as u32, dst) + } +} + +/// Encodes a raw u32 value as UTF-8 into the provided byte buffer, +/// and then returns the number of bytes written. +/// +/// If the buffer is not large enough, nothing will be written into it +/// and a `None` will be returned. +#[inline] +#[unstable] +pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<uint> { + // Marked #[inline] to allow llvm optimizing it away + if code < MAX_ONE_B && dst.len() >= 1 { + dst[0] = code as u8; + Some(1) + } else if code < MAX_TWO_B && dst.len() >= 2 { + dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B; + dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT; + Some(2) + } else if code < MAX_THREE_B && dst.len() >= 3 { + dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B; + dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; + dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT; + Some(3) + } else if dst.len() >= 4 { + dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B; + dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT; + dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; + dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT; + Some(4) + } else { + None + } +} + +/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer, +/// and then returns the number of `u16`s written. +/// +/// If the buffer is not large enough, nothing will be written into it +/// and a `None` will be returned. +#[inline] +#[unstable] +pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<uint> { + // Marked #[inline] to allow llvm optimizing it away + if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 { + // The BMP falls through (assuming non-surrogate, as it should) + dst[0] = ch as u16; + Some(1) + } else if dst.len() >= 2 { + // Supplementary planes break into surrogates. + ch -= 0x1_0000_u32; + dst[0] = 0xD800_u16 | ((ch >> 10) as u16); + dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); + Some(2) + } else { + None } } |
