diff options
| author | Simon Sapin <simon.sapin@exyr.org> | 2018-04-06 10:24:01 +0200 |
|---|---|---|
| committer | Simon Sapin <simon.sapin@exyr.org> | 2018-04-12 00:13:53 +0200 |
| commit | d4ed1e6fa4978141408ef01d0d35c7bd142dd164 (patch) | |
| tree | 4943e332478a1658581eb8872ff7be92b1d1b499 /src | |
| parent | 0d9afcd9b9f881545c8b722855f7e39361495d27 (diff) | |
| download | rust-d4ed1e6fa4978141408ef01d0d35c7bd142dd164.tar.gz rust-d4ed1e6fa4978141408ef01d0d35c7bd142dd164.zip | |
Merge unstable Utf16Encoder into EncodeUtf16
Diffstat (limited to 'src')
| -rw-r--r-- | src/liballoc/str.rs | 27 | ||||
| -rw-r--r-- | src/liballoc/tests/str.rs | 3 | ||||
| -rw-r--r-- | src/libcore/unicode/mod.rs | 58 |
3 files changed, 23 insertions, 65 deletions
diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 0b961c2c186..65df93bd3bb 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -45,7 +45,6 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use core::mem; use core::ptr; use core::iter::FusedIterator; -use core::unicode::Utf16Encoder; use vec_deque::VecDeque; use borrow::{Borrow, ToOwned}; @@ -146,7 +145,8 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] { #[derive(Clone)] #[stable(feature = "encode_utf16", since = "1.8.0")] pub struct EncodeUtf16<'a> { - encoder: Utf16Encoder<Chars<'a>>, + chars: Chars<'a>, + extra: u16, } #[stable(feature = "collection_debug", since = "1.17.0")] @@ -162,12 +162,29 @@ impl<'a> Iterator for EncodeUtf16<'a> { #[inline] fn next(&mut self) -> Option<u16> { - self.encoder.next() + if self.extra != 0 { + let tmp = self.extra; + self.extra = 0; + return Some(tmp); + } + + let mut buf = [0; 2]; + self.chars.next().map(|ch| { + let n = ch.encode_utf16(&mut buf).len(); + if n == 2 { + self.extra = buf[1]; + } + buf[0] + }) } #[inline] fn size_hint(&self) -> (usize, Option<usize>) { - self.encoder.size_hint() + let (low, high) = self.chars.size_hint(); + // every char gets either one u16 or two u16, + // so this iterator is between 1 or 2 times as + // long as the underlying iterator. + (low, high.and_then(|n| n.checked_mul(2))) } } @@ -870,7 +887,7 @@ impl str { /// ``` #[stable(feature = "encode_utf16", since = "1.8.0")] pub fn encode_utf16(&self) -> EncodeUtf16 { - EncodeUtf16 { encoder: Utf16Encoder::new(self[..].chars()) } + EncodeUtf16 { chars: self[..].chars(), extra: 0 } } /// Returns `true` if the given pattern matches a sub-slice of diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index 2df8ca63a3e..a3f4c385fe2 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1204,8 +1204,7 @@ fn test_rev_split_char_iterator_no_trailing() { #[test] fn test_utf16_code_units() { - use core::unicode::Utf16Encoder; - assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(), + assert_eq!("é\u{1F4A9}".encode_utf16().collect::<Vec<u16>>(), [0xE9, 0xD83D, 0xDCA9]) } diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 3413476fd22..9ab8cb748b1 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -24,61 +24,3 @@ pub mod derived_property { pub mod property { pub use unicode::tables::property::Pattern_White_Space; } - -use iter::FusedIterator; - -/// Iterator adaptor for encoding `char`s to UTF-16. -#[derive(Clone)] -#[allow(missing_debug_implementations)] -pub struct Utf16Encoder<I> { - chars: I, - extra: u16, -} - -impl<I> Utf16Encoder<I> { - /// Create a UTF-16 encoder from any `char` iterator. - pub fn new(chars: I) -> Utf16Encoder<I> - where I: Iterator<Item = char> - { - Utf16Encoder { - chars, - extra: 0, - } - } -} - -impl<I> Iterator for Utf16Encoder<I> - where I: Iterator<Item = char> -{ - type Item = u16; - - #[inline] - fn next(&mut self) -> Option<u16> { - if self.extra != 0 { - let tmp = self.extra; - self.extra = 0; - return Some(tmp); - } - - let mut buf = [0; 2]; - self.chars.next().map(|ch| { - let n = ch.encode_utf16(&mut buf).len(); - if n == 2 { - self.extra = buf[1]; - } - buf[0] - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option<usize>) { - let (low, high) = self.chars.size_hint(); - // every char gets either one u16 or two u16, - // so this iterator is between 1 or 2 times as - // long as the underlying iterator. - (low, high.and_then(|n| n.checked_mul(2))) - } -} - -impl<I> FusedIterator for Utf16Encoder<I> - where I: FusedIterator<Item = char> {} |
