diff options
| author | Huon Wilson <dbau.pp+github@gmail.com> | 2013-06-14 01:44:15 +1000 |
|---|---|---|
| committer | Huon Wilson <dbau.pp+github@gmail.com> | 2013-06-16 10:50:28 +1000 |
| commit | f1886680e00850843e2524fba609ddba6a13180b (patch) | |
| tree | bb8ae7af1dab1088058e87436bd429aa4b4ace7e /src/libstd | |
| parent | 4b18fff2be74df9a2db5ee6ab418da322ad6ae18 (diff) | |
| download | rust-f1886680e00850843e2524fba609ddba6a13180b.tar.gz rust-f1886680e00850843e2524fba609ddba6a13180b.zip | |
std: convert str::to_utf16 to a method.
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/os.rs | 2 | ||||
| -rw-r--r-- | src/libstd/str.rs | 55 |
2 files changed, 29 insertions, 28 deletions
diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 044b305a0dd..11572957198 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -144,7 +144,7 @@ pub mod win32 { } pub fn as_utf16_p<T>(s: &str, f: &fn(*u16) -> T) -> T { - let mut t = str::to_utf16(s); + let mut t = s.to_utf16(); // Null terminate before passing on. t += [0u16]; vec::as_imm_buf(t, |buf, _len| f(buf)) diff --git a/src/libstd/str.rs b/src/libstd/str.rs index fbdbb1b3f74..d5beb755a27 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -759,30 +759,6 @@ pub fn is_utf16(v: &[u16]) -> bool { return true; } -/// Converts to a vector of `u16` encoded as UTF-16 -pub fn to_utf16(s: &str) -> ~[u16] { - let mut u = ~[]; - for s.iter().advance |ch| { - // Arithmetic with u32 literals is easier on the eyes than chars. - let mut ch = ch as u32; - - if (ch & 0xFFFF_u32) == ch { - // The BMP falls through (assuming non-surrogate, as it - // should) - assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32); - u.push(ch as u16) - } else { - // Supplementary planes break into surrogates. - assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32); - ch -= 0x1_0000_u32; - let w1 = 0xD800_u16 | ((ch >> 10) as u16); - let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); - u.push_all([w1, w2]) - } - } - u -} - /// Iterates over the utf-16 characters in the specified slice, yielding each /// decoded unicode character to the function provided. /// @@ -1188,6 +1164,7 @@ pub trait StrSlice<'self> { fn replace(&self, from: &str, to: &str) -> ~str; fn to_owned(&self) -> ~str; fn to_managed(&self) -> @str; + fn to_utf16(&self) -> ~[u16]; fn is_char_boundary(&self, index: uint) -> bool; fn char_range_at(&self, start: uint) -> CharRange; fn char_at(&self, i: uint) -> char; @@ -1602,6 +1579,30 @@ impl<'self> StrSlice<'self> for &'self str { unsafe { ::cast::transmute(v) } } + /// Converts to a vector of `u16` encoded as UTF-16. + fn to_utf16(&self) -> ~[u16] { + let mut u = ~[]; + for self.iter().advance |ch| { + // Arithmetic with u32 literals is easier on the eyes than chars. + let mut ch = ch as u32; + + if (ch & 0xFFFF_u32) == ch { + // The BMP falls through (assuming non-surrogate, as it + // should) + assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32); + u.push(ch as u16) + } else { + // Supplementary planes break into surrogates. + assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32); + ch -= 0x1_0000_u32; + let w1 = 0xD800_u16 | ((ch >> 10) as u16); + let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); + u.push_all([w1, w2]) + } + } + u + } + /** * Returns false if the index points into the middle of a multi-byte * character sequence. @@ -3116,10 +3117,10 @@ mod tests { for pairs.each |p| { let (s, u) = copy *p; - assert!(to_utf16(s) == u); + assert!(s.to_utf16() == u); assert!(from_utf16(u) == s); - assert!(from_utf16(to_utf16(s)) == s); - assert!(to_utf16(from_utf16(u)) == u); + assert!(from_utf16(s.to_utf16()) == s); + assert!(from_utf16(u).to_utf16() == u); } } |
