about summary refs log tree commit diff
path: root/src/libstd_unicode/char.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/libstd_unicode/char.rs')
-rw-r--r--src/libstd_unicode/char.rs530
1 files changed, 528 insertions, 2 deletions
diff --git a/src/libstd_unicode/char.rs b/src/libstd_unicode/char.rs
index 5c0c7a4fbca..1c0cdfd8435 100644
--- a/src/libstd_unicode/char.rs
+++ b/src/libstd_unicode/char.rs
@@ -57,6 +57,7 @@ pub use tables::{UnicodeVersion, UNICODE_VERSION};
 /// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
 /// [`char`]: ../../std/primitive.char.html
 #[stable(feature = "rust1", since = "1.0.0")]
+#[derive(Debug)]
 pub struct ToLowercase(CaseMappingIter);
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -78,6 +79,7 @@ impl FusedIterator for ToLowercase {}
 /// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
 /// [`char`]: ../../std/primitive.char.html
 #[stable(feature = "rust1", since = "1.0.0")]
+#[derive(Debug)]
 pub struct ToUppercase(CaseMappingIter);
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -91,6 +93,7 @@ impl Iterator for ToUppercase {
 #[unstable(feature = "fused", issue = "35602")]
 impl FusedIterator for ToUppercase {}
 
+#[derive(Debug)]
 enum CaseMappingIter {
     Three(char, char, char),
     Two(char, char),
@@ -923,11 +926,534 @@ impl char {
     pub fn to_uppercase(self) -> ToUppercase {
         ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
     }
+
+    /// Checks if the value is within the ASCII range.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// let ascii = 'a';
+    /// let non_ascii = '❤';
+    ///
+    /// assert!(ascii.is_ascii());
+    /// assert!(!non_ascii.is_ascii());
+    /// ```
+    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+    #[inline]
+    pub fn is_ascii(&self) -> bool {
+        *self as u32 <= 0x7F
+    }
+
+    /// Makes a copy of the value in its ASCII upper case equivalent.
+    ///
+    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+    /// but non-ASCII letters are unchanged.
+    ///
+    /// To uppercase the value in-place, use [`make_ascii_uppercase`].
+    ///
+    /// To uppercase ASCII characters in addition to non-ASCII characters, use
+    /// [`to_uppercase`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// let ascii = 'a';
+    /// let non_ascii = '❤';
+    ///
+    /// assert_eq!('A', ascii.to_ascii_uppercase());
+    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
+    /// ```
+    ///
+    /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
+    /// [`to_uppercase`]: #method.to_uppercase
+    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+    #[inline]
+    pub fn to_ascii_uppercase(&self) -> char {
+        if self.is_ascii() {
+            (*self as u8).to_ascii_uppercase() as char
+        } else {
+            *self
+        }
+    }
+
+    /// Makes a copy of the value in its ASCII lower case equivalent.
+    ///
+    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+    /// but non-ASCII letters are unchanged.
+    ///
+    /// To lowercase the value in-place, use [`make_ascii_lowercase`].
+    ///
+    /// To lowercase ASCII characters in addition to non-ASCII characters, use
+    /// [`to_lowercase`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// let ascii = 'A';
+    /// let non_ascii = '❤';
+    ///
+    /// assert_eq!('a', ascii.to_ascii_lowercase());
+    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
+    /// ```
+    ///
+    /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
+    /// [`to_lowercase`]: #method.to_lowercase
+    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+    #[inline]
+    pub fn to_ascii_lowercase(&self) -> char {
+        if self.is_ascii() {
+            (*self as u8).to_ascii_lowercase() as char
+        } else {
+            *self
+        }
+    }
+
+    /// Checks that two values are an ASCII case-insensitive match.
+    ///
+    /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// let upper_a = 'A';
+    /// let lower_a = 'a';
+    /// let lower_z = 'z';
+    ///
+    /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
+    /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
+    /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
+    /// ```
+    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+    #[inline]
+    pub fn eq_ignore_ascii_case(&self, other: &char) -> bool {
+        self.to_ascii_lowercase() == other.to_ascii_lowercase()
+    }
+
+    /// Converts this type to its ASCII upper case equivalent in-place.
+    ///
+    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+    /// but non-ASCII letters are unchanged.
+    ///
+    /// To return a new uppercased value without modifying the existing one, use
+    /// [`to_ascii_uppercase`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// let mut ascii = 'a';
+    ///
+    /// ascii.make_ascii_uppercase();
+    ///
+    /// assert_eq!('A', ascii);
+    /// ```
+    ///
+    /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
+    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+    #[inline]
+    pub fn make_ascii_uppercase(&mut self) {
+        *self = self.to_ascii_uppercase();
+    }
+
+    /// Converts this type to its ASCII lower case equivalent in-place.
+    ///
+    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+    /// but non-ASCII letters are unchanged.
+    ///
+    /// To return a new lowercased value without modifying the existing one, use
+    /// [`to_ascii_lowercase`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// let mut ascii = 'A';
+    ///
+    /// ascii.make_ascii_lowercase();
+    ///
+    /// assert_eq!('a', ascii);
+    /// ```
+    ///
+    /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
+    #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
+    #[inline]
+    pub fn make_ascii_lowercase(&mut self) {
+        *self = self.to_ascii_lowercase();
+    }
+
+    /// Checks if the value is an ASCII alphabetic character:
+    ///
+    /// - U+0041 'A' ... U+005A 'Z', or
+    /// - U+0061 'a' ... U+007A 'z'.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(uppercase_a.is_ascii_alphabetic());
+    /// assert!(uppercase_g.is_ascii_alphabetic());
+    /// assert!(a.is_ascii_alphabetic());
+    /// assert!(g.is_ascii_alphabetic());
+    /// assert!(!zero.is_ascii_alphabetic());
+    /// assert!(!percent.is_ascii_alphabetic());
+    /// assert!(!space.is_ascii_alphabetic());
+    /// assert!(!lf.is_ascii_alphabetic());
+    /// assert!(!esc.is_ascii_alphabetic());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_alphabetic(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_alphabetic()
+    }
+
+    /// Checks if the value is an ASCII uppercase character:
+    /// U+0041 'A' ... U+005A 'Z'.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(uppercase_a.is_ascii_uppercase());
+    /// assert!(uppercase_g.is_ascii_uppercase());
+    /// assert!(!a.is_ascii_uppercase());
+    /// assert!(!g.is_ascii_uppercase());
+    /// assert!(!zero.is_ascii_uppercase());
+    /// assert!(!percent.is_ascii_uppercase());
+    /// assert!(!space.is_ascii_uppercase());
+    /// assert!(!lf.is_ascii_uppercase());
+    /// assert!(!esc.is_ascii_uppercase());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_uppercase(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_uppercase()
+    }
+
+    /// Checks if the value is an ASCII lowercase character:
+    /// U+0061 'a' ... U+007A 'z'.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(!uppercase_a.is_ascii_lowercase());
+    /// assert!(!uppercase_g.is_ascii_lowercase());
+    /// assert!(a.is_ascii_lowercase());
+    /// assert!(g.is_ascii_lowercase());
+    /// assert!(!zero.is_ascii_lowercase());
+    /// assert!(!percent.is_ascii_lowercase());
+    /// assert!(!space.is_ascii_lowercase());
+    /// assert!(!lf.is_ascii_lowercase());
+    /// assert!(!esc.is_ascii_lowercase());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_lowercase(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_lowercase()
+    }
+
+    /// Checks if the value is an ASCII alphanumeric character:
+    ///
+    /// - U+0041 'A' ... U+005A 'Z', or
+    /// - U+0061 'a' ... U+007A 'z', or
+    /// - U+0030 '0' ... U+0039 '9'.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(uppercase_a.is_ascii_alphanumeric());
+    /// assert!(uppercase_g.is_ascii_alphanumeric());
+    /// assert!(a.is_ascii_alphanumeric());
+    /// assert!(g.is_ascii_alphanumeric());
+    /// assert!(zero.is_ascii_alphanumeric());
+    /// assert!(!percent.is_ascii_alphanumeric());
+    /// assert!(!space.is_ascii_alphanumeric());
+    /// assert!(!lf.is_ascii_alphanumeric());
+    /// assert!(!esc.is_ascii_alphanumeric());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_alphanumeric(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_alphanumeric()
+    }
+
+    /// Checks if the value is an ASCII decimal digit:
+    /// U+0030 '0' ... U+0039 '9'.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(!uppercase_a.is_ascii_digit());
+    /// assert!(!uppercase_g.is_ascii_digit());
+    /// assert!(!a.is_ascii_digit());
+    /// assert!(!g.is_ascii_digit());
+    /// assert!(zero.is_ascii_digit());
+    /// assert!(!percent.is_ascii_digit());
+    /// assert!(!space.is_ascii_digit());
+    /// assert!(!lf.is_ascii_digit());
+    /// assert!(!esc.is_ascii_digit());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_digit(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_digit()
+    }
+
+    /// Checks if the value is an ASCII hexadecimal digit:
+    ///
+    /// - U+0030 '0' ... U+0039 '9', or
+    /// - U+0041 'A' ... U+0046 'F', or
+    /// - U+0061 'a' ... U+0066 'f'.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(uppercase_a.is_ascii_hexdigit());
+    /// assert!(!uppercase_g.is_ascii_hexdigit());
+    /// assert!(a.is_ascii_hexdigit());
+    /// assert!(!g.is_ascii_hexdigit());
+    /// assert!(zero.is_ascii_hexdigit());
+    /// assert!(!percent.is_ascii_hexdigit());
+    /// assert!(!space.is_ascii_hexdigit());
+    /// assert!(!lf.is_ascii_hexdigit());
+    /// assert!(!esc.is_ascii_hexdigit());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_hexdigit(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_hexdigit()
+    }
+
+    /// Checks if the value is an ASCII punctuation character:
+    ///
+    /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or
+    /// - U+003A ... U+0040 `: ; < = > ? @`, or
+    /// - U+005B ... U+0060 ``[ \ ] ^ _ ` ``, or
+    /// - U+007B ... U+007E `{ | } ~`
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(!uppercase_a.is_ascii_punctuation());
+    /// assert!(!uppercase_g.is_ascii_punctuation());
+    /// assert!(!a.is_ascii_punctuation());
+    /// assert!(!g.is_ascii_punctuation());
+    /// assert!(!zero.is_ascii_punctuation());
+    /// assert!(percent.is_ascii_punctuation());
+    /// assert!(!space.is_ascii_punctuation());
+    /// assert!(!lf.is_ascii_punctuation());
+    /// assert!(!esc.is_ascii_punctuation());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_punctuation(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_punctuation()
+    }
+
+    /// Checks if the value is an ASCII graphic character:
+    /// U+0021 '@' ... U+007E '~'.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(uppercase_a.is_ascii_graphic());
+    /// assert!(uppercase_g.is_ascii_graphic());
+    /// assert!(a.is_ascii_graphic());
+    /// assert!(g.is_ascii_graphic());
+    /// assert!(zero.is_ascii_graphic());
+    /// assert!(percent.is_ascii_graphic());
+    /// assert!(!space.is_ascii_graphic());
+    /// assert!(!lf.is_ascii_graphic());
+    /// assert!(!esc.is_ascii_graphic());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_graphic(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_graphic()
+    }
+
+    /// Checks if the value is an ASCII whitespace character:
+    /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
+    /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
+    ///
+    /// Rust uses the WhatWG Infra Standard's [definition of ASCII
+    /// whitespace][infra-aw]. There are several other definitions in
+    /// wide use. For instance, [the POSIX locale][pct] includes
+    /// U+000B VERTICAL TAB as well as all the above characters,
+    /// but—from the very same specification—[the default rule for
+    /// "field splitting" in the Bourne shell][bfs] considers *only*
+    /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
+    ///
+    /// If you are writing a program that will process an existing
+    /// file format, check what that format's definition of whitespace is
+    /// before using this function.
+    ///
+    /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
+    /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
+    /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(!uppercase_a.is_ascii_whitespace());
+    /// assert!(!uppercase_g.is_ascii_whitespace());
+    /// assert!(!a.is_ascii_whitespace());
+    /// assert!(!g.is_ascii_whitespace());
+    /// assert!(!zero.is_ascii_whitespace());
+    /// assert!(!percent.is_ascii_whitespace());
+    /// assert!(space.is_ascii_whitespace());
+    /// assert!(lf.is_ascii_whitespace());
+    /// assert!(!esc.is_ascii_whitespace());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_whitespace(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_whitespace()
+    }
+
+    /// Checks if the value is an ASCII control character:
+    /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE.
+    /// Note that most ASCII whitespace characters are control
+    /// characters, but SPACE is not.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// #![feature(ascii_ctype)]
+    ///
+    /// let uppercase_a = 'A';
+    /// let uppercase_g = 'G';
+    /// let a = 'a';
+    /// let g = 'g';
+    /// let zero = '0';
+    /// let percent = '%';
+    /// let space = ' ';
+    /// let lf = '\n';
+    /// let esc: char = 0x1b_u8.into();
+    ///
+    /// assert!(!uppercase_a.is_ascii_control());
+    /// assert!(!uppercase_g.is_ascii_control());
+    /// assert!(!a.is_ascii_control());
+    /// assert!(!g.is_ascii_control());
+    /// assert!(!zero.is_ascii_control());
+    /// assert!(!percent.is_ascii_control());
+    /// assert!(!space.is_ascii_control());
+    /// assert!(lf.is_ascii_control());
+    /// assert!(esc.is_ascii_control());
+    /// ```
+    #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
+    #[inline]
+    pub fn is_ascii_control(&self) -> bool {
+        self.is_ascii() && (*self as u8).is_ascii_control()
+    }
 }
 
 /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
 #[stable(feature = "decode_utf16", since = "1.9.0")]
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct DecodeUtf16<I>
     where I: Iterator<Item = u16>
 {
@@ -935,7 +1461,7 @@ pub struct DecodeUtf16<I>
     buf: Option<u16>,
 }
 
-/// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
+/// An error that can be returned when decoding UTF-16 code points.
 #[stable(feature = "decode_utf16", since = "1.9.0")]
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct DecodeUtf16Error {