diff options
| author | Keegan McAllister <kmcallister@mozilla.com> | 2014-04-11 12:49:31 -0700 |
|---|---|---|
| committer | Keegan McAllister <kmcallister@mozilla.com> | 2014-04-11 15:20:15 -0700 |
| commit | e011939b1af554d2a29947feb66f01e27a2a1524 (patch) | |
| tree | 8aafae4be0c043f239c04f247ae65473dd348d89 /src/libstd | |
| parent | 8b6091e8f1f5531fe907f84b6a2b27af04a95e8f (diff) | |
| download | rust-e011939b1af554d2a29947feb66f01e27a2a1524.tar.gz rust-e011939b1af554d2a29947feb66f01e27a2a1524.zip | |
Implement Char::encode_utf16
And clean up encode_utf8 a bit.
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/char.rs | 32 | ||||
| -rw-r--r-- | src/libstd/str.rs | 19 |
2 files changed, 32 insertions, 19 deletions
diff --git a/src/libstd/char.rs b/src/libstd/char.rs index 702dbcca8be..7137ffadb09 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -560,11 +560,19 @@ pub trait Char { /// Encodes this character as UTF-8 into the provided byte buffer. /// - /// The buffer must be at least 4 bytes long or a runtime failure will + /// The buffer must be at least 4 bytes long or a runtime failure may /// occur. /// - /// This will then return the number of characters written to the slice. + /// This will then return the number of bytes written to the slice. fn encode_utf8(&self, dst: &mut [u8]) -> uint; + + /// Encodes this character as UTF-16 into the provided `u16` buffer. + /// + /// The buffer must be at least 2 elements long or a runtime failure may + /// occur. + /// + /// This will then return the number of `u16`s written to the slice. + fn encode_utf16(&self, dst: &mut [u16]) -> uint; } impl Char for char { @@ -602,7 +610,7 @@ impl Char for char { fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } - fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint { + fn encode_utf8(&self, dst: &mut [u8]) -> uint { let code = *self as uint; if code < MAX_ONE_B { dst[0] = code as u8; @@ -624,6 +632,24 @@ impl Char for char { return 4; } } + + fn encode_utf16(&self, dst: &mut [u16]) -> uint { + let mut ch = *self as uint; + if (ch & 0xFFFF_u) == ch { + // The BMP falls through (assuming non-surrogate, as it + // should) + assert!(ch <= 0xD7FF_u || ch >= 0xE000_u); + dst[0] = ch as u16; + 1 + } else { + // Supplementary planes break into surrogates. + assert!(ch >= 0x1_0000_u && ch <= 0x10_FFFF_u); + ch -= 0x1_0000_u; + dst[0] = 0xD800_u16 | ((ch >> 10) as u16); + dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); + 2 + } + } } #[cfg(not(test))] diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 904c19b40ed..449329ce63a 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -2555,22 +2555,9 @@ impl<'a> StrSlice<'a> for &'a str { fn to_utf16(&self) -> ~[u16] { let mut u = ~[]; for ch in self.chars() { - // Arithmetic with u32 literals is easier on the eyes than chars. - let mut ch = ch as u32; - - if (ch & 0xFFFF_u32) == ch { - // The BMP falls through (assuming non-surrogate, as it - // should) - assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32); - u.push(ch as u16) - } else { - // Supplementary planes break into surrogates. - assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32); - ch -= 0x1_0000_u32; - let w1 = 0xD800_u16 | ((ch >> 10) as u16); - let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); - u.push_all([w1, w2]) - } + let mut buf = [0u16, ..2]; + let n = ch.encode_utf16(buf /* as mut slice! */); + u.push_all(buf.slice_to(n)); } u } |
