From 955450212aac9c2babd6cb511974092224fcf93d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 5 Apr 2018 18:02:52 +0200 Subject: Move char decoding iterators into a separate private module. --- src/libcore/unicode/char.rs | 129 -------------------------------------------- 1 file changed, 129 deletions(-) (limited to 'src/libcore/unicode') diff --git a/src/libcore/unicode/char.rs b/src/libcore/unicode/char.rs index e75338aedf1..fda1914a50f 100644 --- a/src/libcore/unicode/char.rs +++ b/src/libcore/unicode/char.rs @@ -1435,132 +1435,3 @@ impl char { self.is_ascii() && (*self as u8).is_ascii_control() } } - -/// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s. -#[stable(feature = "decode_utf16", since = "1.9.0")] -#[derive(Clone, Debug)] -pub struct DecodeUtf16 - where I: Iterator -{ - iter: I, - buf: Option, -} - -/// An error that can be returned when decoding UTF-16 code points. -#[stable(feature = "decode_utf16", since = "1.9.0")] -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct DecodeUtf16Error { - code: u16, -} - -/// Create an iterator over the UTF-16 encoded code points in `iter`, -/// returning unpaired surrogates as `Err`s. -/// -/// # Examples -/// -/// Basic usage: -/// -/// ``` -/// use std::char::decode_utf16; -/// -/// fn main() { -/// // 𝄞music -/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, -/// 0x0073, 0xDD1E, 0x0069, 0x0063, -/// 0xD834]; -/// -/// assert_eq!(decode_utf16(v.iter().cloned()) -/// .map(|r| r.map_err(|e| e.unpaired_surrogate())) -/// .collect::>(), -/// vec![Ok('𝄞'), -/// Ok('m'), Ok('u'), Ok('s'), -/// Err(0xDD1E), -/// Ok('i'), Ok('c'), -/// Err(0xD834)]); -/// } -/// ``` -/// -/// A lossy decoder can be obtained by replacing `Err` results with the replacement character: -/// -/// ``` -/// use std::char::{decode_utf16, REPLACEMENT_CHARACTER}; -/// -/// fn main() { -/// // 𝄞music -/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, -/// 0x0073, 0xDD1E, 0x0069, 0x0063, -/// 0xD834]; -/// -/// assert_eq!(decode_utf16(v.iter().cloned()) -/// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)) -/// .collect::(), -/// "𝄞mus�ic�"); -/// } -/// ``` -#[stable(feature = "decode_utf16", since = "1.9.0")] -#[inline] -pub fn decode_utf16>(iter: I) -> DecodeUtf16 { - DecodeUtf16 { - iter: iter.into_iter(), - buf: None, - } -} - -#[stable(feature = "decode_utf16", since = "1.9.0")] -impl> Iterator for DecodeUtf16 { - type Item = Result; - - fn next(&mut self) -> Option> { - let u = match self.buf.take() { - Some(buf) => buf, - None => self.iter.next()? - }; - - if u < 0xD800 || 0xDFFF < u { - // not a surrogate - Some(Ok(unsafe { from_u32_unchecked(u as u32) })) - } else if u >= 0xDC00 { - // a trailing surrogate - Some(Err(DecodeUtf16Error { code: u })) - } else { - let u2 = match self.iter.next() { - Some(u2) => u2, - // eof - None => return Some(Err(DecodeUtf16Error { code: u })), - }; - if u2 < 0xDC00 || u2 > 0xDFFF { - // not a trailing surrogate so we're not a valid - // surrogate pair, so rewind to redecode u2 next time. - self.buf = Some(u2); - return Some(Err(DecodeUtf16Error { code: u })); - } - - // all ok, so lets decode it. - let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000; - Some(Ok(unsafe { from_u32_unchecked(c) })) - } - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (low, high) = self.iter.size_hint(); - // we could be entirely valid surrogates (2 elements per - // char), or entirely non-surrogates (1 element per char) - (low / 2, high) - } -} - -impl DecodeUtf16Error { - /// Returns the unpaired surrogate which caused this error. - #[stable(feature = "decode_utf16", since = "1.9.0")] - pub fn unpaired_surrogate(&self) -> u16 { - self.code - } -} - -#[stable(feature = "decode_utf16", since = "1.9.0")] -impl fmt::Display for DecodeUtf16Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "unpaired surrogate found: {:x}", self.code) - } -} -- cgit 1.4.1-3-g733a5