diff options
| author | Simon Sapin <simon.sapin@exyr.org> | 2018-04-05 18:36:02 +0200 |
|---|---|---|
| committer | Simon Sapin <simon.sapin@exyr.org> | 2018-04-12 00:13:52 +0200 |
| commit | 34c52534f72f035b898efe3b86028741576f1499 (patch) | |
| tree | a9880390ee9eb3215612bc4e574851643ec9b47f | |
| parent | 1800d695b9bd2c256f2d081da07a94e7a6cba832 (diff) | |
| download | rust-34c52534f72f035b898efe3b86028741576f1499.tar.gz rust-34c52534f72f035b898efe3b86028741576f1499.zip | |
Move the rest of core::unicode::char to core::unicode
| -rw-r--r-- | src/libcore/char/methods.rs (renamed from src/libcore/unicode/char.rs) | 251 | ||||
| -rw-r--r-- | src/libcore/char/mod.rs | 288 | ||||
| -rw-r--r-- | src/libcore/unicode/mod.rs | 1 |
3 files changed, 266 insertions, 274 deletions
diff --git a/src/libcore/unicode/char.rs b/src/libcore/char/methods.rs index fda1914a50f..0958c67ea05 100644 --- a/src/libcore/unicode/char.rs +++ b/src/libcore/char/methods.rs @@ -8,150 +8,155 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! A character type. -//! -//! The `char` type represents a single character. More specifically, since -//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode -//! scalar value]', which is similar to, but not the same as, a '[Unicode code -//! point]'. -//! -//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value -//! [Unicode code point]: http://www.unicode.org/glossary/#code_point -//! -//! This module exists for technical reasons, the primary documentation for -//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html) -//! itself. -//! -//! This module is the home of the iterator implementations for the iterators -//! implemented on `char`, as well as some useful constants and conversion -//! functions that convert various types to `char`. +//! impl char {} -#![stable(feature = "rust1", since = "1.0.0")] - -use char::*; -use char::CharExt as C; -use iter::FusedIterator; -use fmt::{self, Write}; +use slice; +use str::from_utf8_unchecked_mut; +use super::*; +use super::CharExt as C; +use super::printable::is_printable; use unicode::tables::{conversions, derived_property, general_category, property}; -/// Returns an iterator that yields the lowercase equivalent of a `char`. -/// -/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See -/// its documentation for more. -/// -/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase -/// [`char`]: ../../std/primitive.char.html -#[stable(feature = "rust1", since = "1.0.0")] -#[derive(Debug, Clone)] -pub struct ToLowercase(CaseMappingIter); +#[stable(feature = "core", since = "1.6.0")] +impl CharExt for char { + #[inline] + fn is_digit(self, radix: u32) -> bool { + self.to_digit(radix).is_some() + } -#[stable(feature = "rust1", since = "1.0.0")] -impl Iterator for ToLowercase { - type Item = char; - fn next(&mut self) -> Option<char> { - self.0.next() + #[inline] + fn to_digit(self, radix: u32) -> Option<u32> { + if radix > 36 { + panic!("to_digit: radix is too high (maximum 36)"); + } + let val = match self { + '0' ... '9' => self as u32 - '0' as u32, + 'a' ... 'z' => self as u32 - 'a' as u32 + 10, + 'A' ... 'Z' => self as u32 - 'A' as u32 + 10, + _ => return None, + }; + if val < radix { Some(val) } + else { None } } -} -#[stable(feature = "fused", since = "1.26.0")] -impl FusedIterator for ToLowercase {} + #[inline] + fn escape_unicode(self) -> EscapeUnicode { + let c = self as u32; -/// Returns an iterator that yields the uppercase equivalent of a `char`. -/// -/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See -/// its documentation for more. -/// -/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase -/// [`char`]: ../../std/primitive.char.html -#[stable(feature = "rust1", since = "1.0.0")] -#[derive(Debug, Clone)] -pub struct ToUppercase(CaseMappingIter); + // or-ing 1 ensures that for c==0 the code computes that one + // digit should be printed and (which is the same) avoids the + // (31 - 32) underflow + let msb = 31 - (c | 1).leading_zeros(); -#[stable(feature = "rust1", since = "1.0.0")] -impl Iterator for ToUppercase { - type Item = char; - fn next(&mut self) -> Option<char> { - self.0.next() + // the index of the most significant hex digit + let ms_hex_digit = msb / 4; + EscapeUnicode { + c: self, + state: EscapeUnicodeState::Backslash, + hex_digit_idx: ms_hex_digit as usize, + } } -} -#[stable(feature = "fused", since = "1.26.0")] -impl FusedIterator for ToUppercase {} + #[inline] + fn escape_default(self) -> EscapeDefault { + let init_state = match self { + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + '\x20' ... '\x7e' => EscapeDefaultState::Char(self), + _ => EscapeDefaultState::Unicode(self.escape_unicode()) + }; + EscapeDefault { state: init_state } + } -#[derive(Debug, Clone)] -enum CaseMappingIter { - Three(char, char, char), - Two(char, char), - One(char), - Zero, -} + #[inline] + fn escape_debug(self) -> EscapeDebug { + let init_state = match self { + '\t' => EscapeDefaultState::Backslash('t'), + '\r' => EscapeDefaultState::Backslash('r'), + '\n' => EscapeDefaultState::Backslash('n'), + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), + c if is_printable(c) => EscapeDefaultState::Char(c), + c => EscapeDefaultState::Unicode(c.escape_unicode()), + }; + EscapeDebug(EscapeDefault { state: init_state }) + } -impl CaseMappingIter { - fn new(chars: [char; 3]) -> CaseMappingIter { - if chars[2] == '\0' { - if chars[1] == '\0' { - CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0' - } else { - CaseMappingIter::Two(chars[0], chars[1]) - } + #[inline] + fn len_utf8(self) -> usize { + let code = self as u32; + if code < MAX_ONE_B { + 1 + } else if code < MAX_TWO_B { + 2 + } else if code < MAX_THREE_B { + 3 } else { - CaseMappingIter::Three(chars[0], chars[1], chars[2]) + 4 } } -} -impl Iterator for CaseMappingIter { - type Item = char; - fn next(&mut self) -> Option<char> { - match *self { - CaseMappingIter::Three(a, b, c) => { - *self = CaseMappingIter::Two(b, c); - Some(a) - } - CaseMappingIter::Two(b, c) => { - *self = CaseMappingIter::One(c); - Some(b) - } - CaseMappingIter::One(c) => { - *self = CaseMappingIter::Zero; - Some(c) - } - CaseMappingIter::Zero => None, - } + #[inline] + fn len_utf16(self) -> usize { + let ch = self as u32; + if (ch & 0xFFFF) == ch { 1 } else { 2 } } -} -impl fmt::Display for CaseMappingIter { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - CaseMappingIter::Three(a, b, c) => { - f.write_char(a)?; - f.write_char(b)?; - f.write_char(c) - } - CaseMappingIter::Two(b, c) => { - f.write_char(b)?; - f.write_char(c) - } - CaseMappingIter::One(c) => { - f.write_char(c) - } - CaseMappingIter::Zero => Ok(()), + #[inline] + fn encode_utf8(self, dst: &mut [u8]) -> &mut str { + let code = self as u32; + unsafe { + let len = + if code < MAX_ONE_B && !dst.is_empty() { + *dst.get_unchecked_mut(0) = code as u8; + 1 + } else if code < MAX_TWO_B && dst.len() >= 2 { + *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; + 2 + } else if code < MAX_THREE_B && dst.len() >= 3 { + *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; + 3 + } else if dst.len() >= 4 { + *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; + 4 + } else { + panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", + from_u32_unchecked(code).len_utf8(), + code, + dst.len()) + }; + from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) } } -} - -#[stable(feature = "char_struct_display", since = "1.16.0")] -impl fmt::Display for ToLowercase { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.0, f) - } -} -#[stable(feature = "char_struct_display", since = "1.16.0")] -impl fmt::Display for ToUppercase { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.0, f) + #[inline] + fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { + let mut code = self as u32; + unsafe { + if (code & 0xFFFF) == code && !dst.is_empty() { + // The BMP falls through (assuming non-surrogate, as it should) + *dst.get_unchecked_mut(0) = code as u16; + slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) + } else if dst.len() >= 2 { + // Supplementary planes break into surrogates. + code -= 0x1_0000; + *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); + *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); + slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) + } else { + panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", + from_u32_unchecked(code).len_utf16(), + code, + dst.len()) + } + } } } diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs index 01a7b49ac74..7b4f0dc4548 100644 --- a/src/libcore/char/mod.rs +++ b/src/libcore/char/mod.rs @@ -8,15 +8,30 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Character manipulation. +//! A character type. //! -//! For more details, see ::core::unicode::char (a.k.a. std::char) +//! The `char` type represents a single character. More specifically, since +//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode +//! scalar value]', which is similar to, but not the same as, a '[Unicode code +//! point]'. +//! +//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value +//! [Unicode code point]: http://www.unicode.org/glossary/#code_point +//! +//! This module exists for technical reasons, the primary documentation for +//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html) +//! itself. +//! +//! This module is the home of the iterator implementations for the iterators +//! implemented on `char`, as well as some useful constants and conversion +//! functions that convert various types to `char`. #![allow(non_snake_case)] #![stable(feature = "core_char", since = "1.2.0")] mod convert; mod decode; +mod methods; mod printable; // stable re-exports @@ -28,8 +43,6 @@ pub use self::convert::from_u32_unchecked; pub use self::convert::ParseCharError; #[stable(feature = "try_from", since = "1.26.0")] pub use self::convert::CharTryFromError; -#[stable(feature = "rust1", since = "1.0.0")] -pub use unicode::char::{ToLowercase, ToUppercase}; #[stable(feature = "decode_utf16", since = "1.9.0")] pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error}; @@ -41,10 +54,7 @@ pub use unicode::version::UnicodeVersion; #[unstable(feature = "decode_utf8", issue = "33906")] pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence}; -use self::printable::is_printable; use fmt::{self, Write}; -use slice; -use str::from_utf8_unchecked_mut; use iter::FusedIterator; // UTF-8 ranges and tags for encoding characters @@ -137,149 +147,6 @@ pub trait CharExt { fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16]; } -#[stable(feature = "core", since = "1.6.0")] -impl CharExt for char { - #[inline] - fn is_digit(self, radix: u32) -> bool { - self.to_digit(radix).is_some() - } - - #[inline] - fn to_digit(self, radix: u32) -> Option<u32> { - if radix > 36 { - panic!("to_digit: radix is too high (maximum 36)"); - } - let val = match self { - '0' ... '9' => self as u32 - '0' as u32, - 'a' ... 'z' => self as u32 - 'a' as u32 + 10, - 'A' ... 'Z' => self as u32 - 'A' as u32 + 10, - _ => return None, - }; - if val < radix { Some(val) } - else { None } - } - - #[inline] - fn escape_unicode(self) -> EscapeUnicode { - let c = self as u32; - - // or-ing 1 ensures that for c==0 the code computes that one - // digit should be printed and (which is the same) avoids the - // (31 - 32) underflow - let msb = 31 - (c | 1).leading_zeros(); - - // the index of the most significant hex digit - let ms_hex_digit = msb / 4; - EscapeUnicode { - c: self, - state: EscapeUnicodeState::Backslash, - hex_digit_idx: ms_hex_digit as usize, - } - } - - #[inline] - fn escape_default(self) -> EscapeDefault { - let init_state = match self { - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - '\x20' ... '\x7e' => EscapeDefaultState::Char(self), - _ => EscapeDefaultState::Unicode(self.escape_unicode()) - }; - EscapeDefault { state: init_state } - } - - #[inline] - fn escape_debug(self) -> EscapeDebug { - let init_state = match self { - '\t' => EscapeDefaultState::Backslash('t'), - '\r' => EscapeDefaultState::Backslash('r'), - '\n' => EscapeDefaultState::Backslash('n'), - '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - c if is_printable(c) => EscapeDefaultState::Char(c), - c => EscapeDefaultState::Unicode(c.escape_unicode()), - }; - EscapeDebug(EscapeDefault { state: init_state }) - } - - #[inline] - fn len_utf8(self) -> usize { - let code = self as u32; - if code < MAX_ONE_B { - 1 - } else if code < MAX_TWO_B { - 2 - } else if code < MAX_THREE_B { - 3 - } else { - 4 - } - } - - #[inline] - fn len_utf16(self) -> usize { - let ch = self as u32; - if (ch & 0xFFFF) == ch { 1 } else { 2 } - } - - #[inline] - fn encode_utf8(self, dst: &mut [u8]) -> &mut str { - let code = self as u32; - unsafe { - let len = - if code < MAX_ONE_B && !dst.is_empty() { - *dst.get_unchecked_mut(0) = code as u8; - 1 - } else if code < MAX_TWO_B && dst.len() >= 2 { - *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; - 2 - } else if code < MAX_THREE_B && dst.len() >= 3 { - *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; - 3 - } else if dst.len() >= 4 { - *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; - 4 - } else { - panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", - from_u32_unchecked(code).len_utf8(), - code, - dst.len()) - }; - from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) - } - } - - #[inline] - fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { - let mut code = self as u32; - unsafe { - if (code & 0xFFFF) == code && !dst.is_empty() { - // The BMP falls through (assuming non-surrogate, as it should) - *dst.get_unchecked_mut(0) = code as u16; - slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) - } else if dst.len() >= 2 { - // Supplementary planes break into surrogates. - code -= 0x1_0000; - *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); - *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); - slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) - } else { - panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", - from_u32_unchecked(code).len_utf16(), - code, - dst.len()) - } - } - } -} - /// Returns an iterator that yields the hexadecimal Unicode escape of a /// character, as `char`s. /// @@ -545,3 +412,124 @@ impl fmt::Display for EscapeDebug { fmt::Display::fmt(&self.0, f) } } + +/// Returns an iterator that yields the lowercase equivalent of a `char`. +/// +/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See +/// its documentation for more. +/// +/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase +/// [`char`]: ../../std/primitive.char.html +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug, Clone)] +pub struct ToLowercase(CaseMappingIter); + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for ToLowercase { + type Item = char; + fn next(&mut self) -> Option<char> { + self.0.next() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for ToLowercase {} + +/// Returns an iterator that yields the uppercase equivalent of a `char`. +/// +/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See +/// its documentation for more. +/// +/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase +/// [`char`]: ../../std/primitive.char.html +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug, Clone)] +pub struct ToUppercase(CaseMappingIter); + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for ToUppercase { + type Item = char; + fn next(&mut self) -> Option<char> { + self.0.next() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for ToUppercase {} + +#[derive(Debug, Clone)] +enum CaseMappingIter { + Three(char, char, char), + Two(char, char), + One(char), + Zero, +} + +impl CaseMappingIter { + fn new(chars: [char; 3]) -> CaseMappingIter { + if chars[2] == '\0' { + if chars[1] == '\0' { + CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0' + } else { + CaseMappingIter::Two(chars[0], chars[1]) + } + } else { + CaseMappingIter::Three(chars[0], chars[1], chars[2]) + } + } +} + +impl Iterator for CaseMappingIter { + type Item = char; + fn next(&mut self) -> Option<char> { + match *self { + CaseMappingIter::Three(a, b, c) => { + *self = CaseMappingIter::Two(b, c); + Some(a) + } + CaseMappingIter::Two(b, c) => { + *self = CaseMappingIter::One(c); + Some(b) + } + CaseMappingIter::One(c) => { + *self = CaseMappingIter::Zero; + Some(c) + } + CaseMappingIter::Zero => None, + } + } +} + +impl fmt::Display for CaseMappingIter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + CaseMappingIter::Three(a, b, c) => { + f.write_char(a)?; + f.write_char(b)?; + f.write_char(c) + } + CaseMappingIter::Two(b, c) => { + f.write_char(b)?; + f.write_char(c) + } + CaseMappingIter::One(c) => { + f.write_char(c) + } + CaseMappingIter::Zero => Ok(()), + } + } +} + +#[stable(feature = "char_struct_display", since = "1.16.0")] +impl fmt::Display for ToLowercase { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} + +#[stable(feature = "char_struct_display", since = "1.16.0")] +impl fmt::Display for ToUppercase { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 0ea1aa12146..060c55286fe 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -16,7 +16,6 @@ pub(crate) mod tables; pub(crate) mod version; pub mod str; -pub(crate) mod char; // For use in liballoc, not re-exported in libstd. pub mod derived_property { |
