diff options
Diffstat (limited to 'src/libstd_unicode/char.rs')
| -rw-r--r-- | src/libstd_unicode/char.rs | 530 |
1 files changed, 528 insertions, 2 deletions
diff --git a/src/libstd_unicode/char.rs b/src/libstd_unicode/char.rs index 5c0c7a4fbca..1c0cdfd8435 100644 --- a/src/libstd_unicode/char.rs +++ b/src/libstd_unicode/char.rs @@ -57,6 +57,7 @@ pub use tables::{UnicodeVersion, UNICODE_VERSION}; /// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase /// [`char`]: ../../std/primitive.char.html #[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug)] pub struct ToLowercase(CaseMappingIter); #[stable(feature = "rust1", since = "1.0.0")] @@ -78,6 +79,7 @@ impl FusedIterator for ToLowercase {} /// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase /// [`char`]: ../../std/primitive.char.html #[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug)] pub struct ToUppercase(CaseMappingIter); #[stable(feature = "rust1", since = "1.0.0")] @@ -91,6 +93,7 @@ impl Iterator for ToUppercase { #[unstable(feature = "fused", issue = "35602")] impl FusedIterator for ToUppercase {} +#[derive(Debug)] enum CaseMappingIter { Three(char, char, char), Two(char, char), @@ -923,11 +926,534 @@ impl char { pub fn to_uppercase(self) -> ToUppercase { ToUppercase(CaseMappingIter::new(conversions::to_upper(self))) } + + /// Checks if the value is within the ASCII range. + /// + /// # Examples + /// + /// ``` + /// let ascii = 'a'; + /// let non_ascii = '❤'; + /// + /// assert!(ascii.is_ascii()); + /// assert!(!non_ascii.is_ascii()); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn is_ascii(&self) -> bool { + *self as u32 <= 0x7F + } + + /// Makes a copy of the value in its ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// To uppercase ASCII characters in addition to non-ASCII characters, use + /// [`to_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// let ascii = 'a'; + /// let non_ascii = '❤'; + /// + /// assert_eq!('A', ascii.to_ascii_uppercase()); + /// assert_eq!('❤', non_ascii.to_ascii_uppercase()); + /// ``` + /// + /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase + /// [`to_uppercase`]: #method.to_uppercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> char { + if self.is_ascii() { + (*self as u8).to_ascii_uppercase() as char + } else { + *self + } + } + + /// Makes a copy of the value in its ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// To lowercase ASCII characters in addition to non-ASCII characters, use + /// [`to_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// let ascii = 'A'; + /// let non_ascii = '❤'; + /// + /// assert_eq!('a', ascii.to_ascii_lowercase()); + /// assert_eq!('❤', non_ascii.to_ascii_lowercase()); + /// ``` + /// + /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase + /// [`to_lowercase`]: #method.to_lowercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> char { + if self.is_ascii() { + (*self as u8).to_ascii_lowercase() as char + } else { + *self + } + } + + /// Checks that two values are an ASCII case-insensitive match. + /// + /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`. + /// + /// # Examples + /// + /// ``` + /// let upper_a = 'A'; + /// let lower_a = 'a'; + /// let lower_z = 'z'; + /// + /// assert!(upper_a.eq_ignore_ascii_case(&lower_a)); + /// assert!(upper_a.eq_ignore_ascii_case(&upper_a)); + /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z)); + /// ``` + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &char) -> bool { + self.to_ascii_lowercase() == other.to_ascii_lowercase() + } + + /// Converts this type to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`to_ascii_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// let mut ascii = 'a'; + /// + /// ascii.make_ascii_uppercase(); + /// + /// assert_eq!('A', ascii); + /// ``` + /// + /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn make_ascii_uppercase(&mut self) { + *self = self.to_ascii_uppercase(); + } + + /// Converts this type to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`to_ascii_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// let mut ascii = 'A'; + /// + /// ascii.make_ascii_lowercase(); + /// + /// assert_eq!('a', ascii); + /// ``` + /// + /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")] + #[inline] + pub fn make_ascii_lowercase(&mut self) { + *self = self.to_ascii_lowercase(); + } + + /// Checks if the value is an ASCII alphabetic character: + /// + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_alphabetic()); + /// assert!(uppercase_g.is_ascii_alphabetic()); + /// assert!(a.is_ascii_alphabetic()); + /// assert!(g.is_ascii_alphabetic()); + /// assert!(!zero.is_ascii_alphabetic()); + /// assert!(!percent.is_ascii_alphabetic()); + /// assert!(!space.is_ascii_alphabetic()); + /// assert!(!lf.is_ascii_alphabetic()); + /// assert!(!esc.is_ascii_alphabetic()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_alphabetic(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_alphabetic() + } + + /// Checks if the value is an ASCII uppercase character: + /// U+0041 'A' ... U+005A 'Z'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_uppercase()); + /// assert!(uppercase_g.is_ascii_uppercase()); + /// assert!(!a.is_ascii_uppercase()); + /// assert!(!g.is_ascii_uppercase()); + /// assert!(!zero.is_ascii_uppercase()); + /// assert!(!percent.is_ascii_uppercase()); + /// assert!(!space.is_ascii_uppercase()); + /// assert!(!lf.is_ascii_uppercase()); + /// assert!(!esc.is_ascii_uppercase()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_uppercase(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_uppercase() + } + + /// Checks if the value is an ASCII lowercase character: + /// U+0061 'a' ... U+007A 'z'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_lowercase()); + /// assert!(!uppercase_g.is_ascii_lowercase()); + /// assert!(a.is_ascii_lowercase()); + /// assert!(g.is_ascii_lowercase()); + /// assert!(!zero.is_ascii_lowercase()); + /// assert!(!percent.is_ascii_lowercase()); + /// assert!(!space.is_ascii_lowercase()); + /// assert!(!lf.is_ascii_lowercase()); + /// assert!(!esc.is_ascii_lowercase()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_lowercase(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_lowercase() + } + + /// Checks if the value is an ASCII alphanumeric character: + /// + /// - U+0041 'A' ... U+005A 'Z', or + /// - U+0061 'a' ... U+007A 'z', or + /// - U+0030 '0' ... U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_alphanumeric()); + /// assert!(uppercase_g.is_ascii_alphanumeric()); + /// assert!(a.is_ascii_alphanumeric()); + /// assert!(g.is_ascii_alphanumeric()); + /// assert!(zero.is_ascii_alphanumeric()); + /// assert!(!percent.is_ascii_alphanumeric()); + /// assert!(!space.is_ascii_alphanumeric()); + /// assert!(!lf.is_ascii_alphanumeric()); + /// assert!(!esc.is_ascii_alphanumeric()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_alphanumeric(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_alphanumeric() + } + + /// Checks if the value is an ASCII decimal digit: + /// U+0030 '0' ... U+0039 '9'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_digit()); + /// assert!(!uppercase_g.is_ascii_digit()); + /// assert!(!a.is_ascii_digit()); + /// assert!(!g.is_ascii_digit()); + /// assert!(zero.is_ascii_digit()); + /// assert!(!percent.is_ascii_digit()); + /// assert!(!space.is_ascii_digit()); + /// assert!(!lf.is_ascii_digit()); + /// assert!(!esc.is_ascii_digit()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_digit(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_digit() + } + + /// Checks if the value is an ASCII hexadecimal digit: + /// + /// - U+0030 '0' ... U+0039 '9', or + /// - U+0041 'A' ... U+0046 'F', or + /// - U+0061 'a' ... U+0066 'f'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_hexdigit()); + /// assert!(!uppercase_g.is_ascii_hexdigit()); + /// assert!(a.is_ascii_hexdigit()); + /// assert!(!g.is_ascii_hexdigit()); + /// assert!(zero.is_ascii_hexdigit()); + /// assert!(!percent.is_ascii_hexdigit()); + /// assert!(!space.is_ascii_hexdigit()); + /// assert!(!lf.is_ascii_hexdigit()); + /// assert!(!esc.is_ascii_hexdigit()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_hexdigit(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_hexdigit() + } + + /// Checks if the value is an ASCII punctuation character: + /// + /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or + /// - U+003A ... U+0040 `: ; < = > ? @`, or + /// - U+005B ... U+0060 ``[ \ ] ^ _ ` ``, or + /// - U+007B ... U+007E `{ | } ~` + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_punctuation()); + /// assert!(!uppercase_g.is_ascii_punctuation()); + /// assert!(!a.is_ascii_punctuation()); + /// assert!(!g.is_ascii_punctuation()); + /// assert!(!zero.is_ascii_punctuation()); + /// assert!(percent.is_ascii_punctuation()); + /// assert!(!space.is_ascii_punctuation()); + /// assert!(!lf.is_ascii_punctuation()); + /// assert!(!esc.is_ascii_punctuation()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_punctuation(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_punctuation() + } + + /// Checks if the value is an ASCII graphic character: + /// U+0021 '@' ... U+007E '~'. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(uppercase_a.is_ascii_graphic()); + /// assert!(uppercase_g.is_ascii_graphic()); + /// assert!(a.is_ascii_graphic()); + /// assert!(g.is_ascii_graphic()); + /// assert!(zero.is_ascii_graphic()); + /// assert!(percent.is_ascii_graphic()); + /// assert!(!space.is_ascii_graphic()); + /// assert!(!lf.is_ascii_graphic()); + /// assert!(!esc.is_ascii_graphic()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_graphic(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_graphic() + } + + /// Checks if the value is an ASCII whitespace character: + /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED, + /// U+000C FORM FEED, or U+000D CARRIAGE RETURN. + /// + /// Rust uses the WhatWG Infra Standard's [definition of ASCII + /// whitespace][infra-aw]. There are several other definitions in + /// wide use. For instance, [the POSIX locale][pct] includes + /// U+000B VERTICAL TAB as well as all the above characters, + /// but—from the very same specification—[the default rule for + /// "field splitting" in the Bourne shell][bfs] considers *only* + /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace. + /// + /// If you are writing a program that will process an existing + /// file format, check what that format's definition of whitespace is + /// before using this function. + /// + /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace + /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01 + /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_whitespace()); + /// assert!(!uppercase_g.is_ascii_whitespace()); + /// assert!(!a.is_ascii_whitespace()); + /// assert!(!g.is_ascii_whitespace()); + /// assert!(!zero.is_ascii_whitespace()); + /// assert!(!percent.is_ascii_whitespace()); + /// assert!(space.is_ascii_whitespace()); + /// assert!(lf.is_ascii_whitespace()); + /// assert!(!esc.is_ascii_whitespace()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_whitespace(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_whitespace() + } + + /// Checks if the value is an ASCII control character: + /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE. + /// Note that most ASCII whitespace characters are control + /// characters, but SPACE is not. + /// + /// # Examples + /// + /// ``` + /// #![feature(ascii_ctype)] + /// + /// let uppercase_a = 'A'; + /// let uppercase_g = 'G'; + /// let a = 'a'; + /// let g = 'g'; + /// let zero = '0'; + /// let percent = '%'; + /// let space = ' '; + /// let lf = '\n'; + /// let esc: char = 0x1b_u8.into(); + /// + /// assert!(!uppercase_a.is_ascii_control()); + /// assert!(!uppercase_g.is_ascii_control()); + /// assert!(!a.is_ascii_control()); + /// assert!(!g.is_ascii_control()); + /// assert!(!zero.is_ascii_control()); + /// assert!(!percent.is_ascii_control()); + /// assert!(!space.is_ascii_control()); + /// assert!(lf.is_ascii_control()); + /// assert!(esc.is_ascii_control()); + /// ``` + #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")] + #[inline] + pub fn is_ascii_control(&self) -> bool { + self.is_ascii() && (*self as u8).is_ascii_control() + } } /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s. #[stable(feature = "decode_utf16", since = "1.9.0")] -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct DecodeUtf16<I> where I: Iterator<Item = u16> { @@ -935,7 +1461,7 @@ pub struct DecodeUtf16<I> buf: Option<u16>, } -/// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s. +/// An error that can be returned when decoding UTF-16 code points. #[stable(feature = "decode_utf16", since = "1.9.0")] #[derive(Debug, Clone, Eq, PartialEq)] pub struct DecodeUtf16Error { |
