diff options
| author | bors <bors@rust-lang.org> | 2014-11-21 14:21:48 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2014-11-21 14:21:48 +0000 |
| commit | 9efa23e9c0d98d47612eb6cf40fdf405f9c98b65 (patch) | |
| tree | 7bb764af5e6dacf3173643be5c4461ab98dc5b8c | |
| parent | 47c1d437c93872e6776570beac839f2587782f94 (diff) | |
| parent | dff48a99d6bf9d2c4828f187bfa2c9ba382fe791 (diff) | |
| download | rust-9efa23e9c0d98d47612eb6cf40fdf405f9c98b65.tar.gz rust-9efa23e9c0d98d47612eb6cf40fdf405f9c98b65.zip | |
auto merge of #19042 : SimonSapin/rust/generic-utf16-encoder, r=alexcrichton
This allows encoding to UTF-16 something that is not in UTF-8, e.g. a `[char]` UTF-32 string. This might help with servo/servo#4023
| -rw-r--r-- | src/libcollections/str.rs | 3 | ||||
| -rw-r--r-- | src/libcore/str.rs | 28 | ||||
| -rw-r--r-- | src/libcoretest/str.rs | 7 |
3 files changed, 34 insertions, 4 deletions
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index a7df5f4644a..9c93669b5ac 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -74,7 +74,8 @@ use vec::Vec; pub use core::str::{from_utf8, CharEq, Chars, CharOffsets}; pub use core::str::{Bytes, CharSplits}; pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits}; -pub use core::str::{Utf16CodeUnits, eq_slice, is_utf8, is_utf16, Utf16Items}; +pub use core::str::{Utf16Encoder, Utf16CodeUnits}; +pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items}; pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items}; pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange}; pub use core::str::{FromStr, from_str}; diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 24f26b15f27..68e490ecb19 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -762,12 +762,34 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> { /// Use with the `std::iter` module. #[deriving(Clone)] pub struct Utf16CodeUnits<'a> { - chars: Chars<'a>, - extra: u16 + encoder: Utf16Encoder<Chars<'a>> } impl<'a> Iterator<u16> for Utf16CodeUnits<'a> { #[inline] + fn next(&mut self) -> Option<u16> { self.encoder.next() } + + #[inline] + fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() } +} + + +/// Iterator adaptor for encoding `char`s to UTF-16. +#[deriving(Clone)] +pub struct Utf16Encoder<I> { + chars: I, + extra: u16 +} + +impl<I> Utf16Encoder<I> { + /// Create an UTF-16 encoder from any `char` iterator. + pub fn new(chars: I) -> Utf16Encoder<I> where I: Iterator<char> { + Utf16Encoder { chars: chars, extra: 0 } + } +} + +impl<I> Iterator<u16> for Utf16Encoder<I> where I: Iterator<char> { + #[inline] fn next(&mut self) -> Option<u16> { if self.extra != 0 { let tmp = self.extra; @@ -2225,7 +2247,7 @@ impl StrPrelude for str { #[inline] fn utf16_units(&self) -> Utf16CodeUnits { - Utf16CodeUnits{ chars: self.chars(), extra: 0} + Utf16CodeUnits { encoder: Utf16Encoder::new(self.chars()) } } #[inline] diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs index 5f44fd807cc..9574aeb3762 100644 --- a/src/libcoretest/str.rs +++ b/src/libcoretest/str.rs @@ -114,3 +114,10 @@ fn test_rev_split_char_iterator_no_trailing() { split.reverse(); assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]); } + +#[test] +fn test_utf16_code_units() { + use core::str::Utf16Encoder; + assert_eq!(Utf16Encoder::new(vec!['é', '\U0001F4A9'].into_iter()).collect::<Vec<u16>>(), + vec![0xE9, 0xD83D, 0xDCA9]) +} |
