about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorEduardo Sánchez Muñoz <esm@eduardosm.net>2020-05-03 18:12:00 +0200
committerEduardo Sánchez Muñoz <esm@eduardosm.net>2020-05-03 18:12:53 +0200
commit68f89fcbf982d4b2a40d3175568bef194ee4f3b7 (patch)
tree61d3dee2a81139f0cbf4a4c76b4dd068655a6c80 /src
parente5f35df2c6944b843b08369c4b2ff3bdb0beb2d2 (diff)
downloadrust-68f89fcbf982d4b2a40d3175568bef194ee4f3b7.tar.gz
rust-68f89fcbf982d4b2a40d3175568bef194ee4f3b7.zip
Make `std::char` functions and constants associated to `char`.
Diffstat (limited to 'src')
-rw-r--r--src/libcore/char/methods.rs240
-rw-r--r--src/libcore/char/mod.rs4
2 files changed, 242 insertions, 2 deletions
diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs
index 302400744e2..e0e70216918 100644
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@@ -9,6 +9,246 @@ use super::*;
 
 #[lang = "char"]
 impl char {
+    /// The highest valid code point a `char` can have.
+    ///
+    /// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
+    /// Point], but only ones within a certain range. `MAX` is the highest valid
+    /// code point that's a valid [Unicode Scalar Value].
+    ///
+    /// [`char`]: ../../std/primitive.char.html
+    /// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
+    /// [Code Point]: http://www.unicode.org/glossary/#code_point
+    #[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
+    pub const MAX: char = '\u{10ffff}';
+
+    /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
+    /// decoding error.
+    ///
+    /// It can occur, for example, when giving ill-formed UTF-8 bytes to
+    /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
+    #[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
+    pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
+
+    /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
+    /// `char` and `str` methods are based on.
+    ///
+    /// New versions of Unicode are released regularly and subsequently all methods
+    /// in the standard library depending on Unicode are updated. Therefore the
+    /// behavior of some `char` and `str` methods and the value of this constant
+    /// changes over time. This is *not* considered to be a breaking change.
+    ///
+    /// The version numbering scheme is explained in
+    /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
+    #[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
+    pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
+
+    /// Creates an iterator over the UTF-16 encoded code points in `iter`,
+    /// returning unpaired surrogates as `Err`s.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use std::char::decode_utf16;
+    ///
+    /// // 𝄞mus<invalid>ic<invalid>
+    /// let v = [
+    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
+    /// ];
+    ///
+    /// assert_eq!(
+    ///     decode_utf16(v.iter().cloned())
+    ///         .map(|r| r.map_err(|e| e.unpaired_surrogate()))
+    ///         .collect::<Vec<_>>(),
+    ///     vec![
+    ///         Ok('𝄞'),
+    ///         Ok('m'), Ok('u'), Ok('s'),
+    ///         Err(0xDD1E),
+    ///         Ok('i'), Ok('c'),
+    ///         Err(0xD834)
+    ///     ]
+    /// );
+    /// ```
+    ///
+    /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
+    ///
+    /// ```
+    /// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
+    ///
+    /// // 𝄞mus<invalid>ic<invalid>
+    /// let v = [
+    ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
+    /// ];
+    ///
+    /// assert_eq!(
+    ///     decode_utf16(v.iter().cloned())
+    ///        .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
+    ///        .collect::<String>(),
+    ///     "𝄞mus�ic�"
+    /// );
+    /// ```
+    #[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
+    #[inline]
+    pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
+        super::decode::decode_utf16(iter)
+    }
+
+    /// Converts a `u32` to a `char`.
+    ///
+    /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
+    /// `as`:
+    ///
+    /// ```
+    /// let c = '💯';
+    /// let i = c as u32;
+    ///
+    /// assert_eq!(128175, i);
+    /// ```
+    ///
+    /// However, the reverse is not true: not all valid [`u32`]s are valid
+    /// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
+    /// for a [`char`].
+    ///
+    /// [`char`]: ../../std/primitive.char.html
+    /// [`u32`]: ../../std/primitive.u32.html
+    ///
+    /// For an unsafe version of this function which ignores these checks, see
+    /// [`from_u32_unchecked`].
+    ///
+    /// [`from_u32_unchecked`]: fn.from_u32_unchecked.html
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use std::char;
+    ///
+    /// let c = char::from_u32(0x2764);
+    ///
+    /// assert_eq!(Some('❤'), c);
+    /// ```
+    ///
+    /// Returning `None` when the input is not a valid [`char`]:
+    ///
+    /// ```
+    /// use std::char;
+    ///
+    /// let c = char::from_u32(0x110000);
+    ///
+    /// assert_eq!(None, c);
+    /// ```
+    #[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
+    #[inline]
+    pub fn from_u32(i: u32) -> Option<char> {
+        super::convert::from_u32(i)
+    }
+
+    /// Converts a `u32` to a `char`, ignoring validity.
+    ///
+    /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
+    /// `as`:
+    ///
+    /// ```
+    /// let c = '💯';
+    /// let i = c as u32;
+    ///
+    /// assert_eq!(128175, i);
+    /// ```
+    ///
+    /// However, the reverse is not true: not all valid [`u32`]s are valid
+    /// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
+    /// [`char`], possibly creating an invalid one.
+    ///
+    /// [`char`]: ../../std/primitive.char.html
+    /// [`u32`]: ../../std/primitive.u32.html
+    ///
+    /// # Safety
+    ///
+    /// This function is unsafe, as it may construct invalid `char` values.
+    ///
+    /// For a safe version of this function, see the [`from_u32`] function.
+    ///
+    /// [`from_u32`]: fn.from_u32.html
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use std::char;
+    ///
+    /// let c = unsafe { char::from_u32_unchecked(0x2764) };
+    ///
+    /// assert_eq!('❤', c);
+    /// ```
+    #[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
+    #[inline]
+    pub unsafe fn from_u32_unchecked(i: u32) -> char {
+        super::convert::from_u32_unchecked(i)
+    }
+
+    /// Converts a digit in the given radix to a `char`.
+    ///
+    /// A 'radix' here is sometimes also called a 'base'. A radix of two
+    /// indicates a binary number, a radix of ten, decimal, and a radix of
+    /// sixteen, hexadecimal, to give some common values. Arbitrary
+    /// radices are supported.
+    ///
+    /// `from_digit()` will return `None` if the input is not a digit in
+    /// the given radix.
+    ///
+    /// # Panics
+    ///
+    /// Panics if given a radix larger than 36.
+    ///
+    /// # Examples
+    ///
+    /// Basic usage:
+    ///
+    /// ```
+    /// use std::char;
+    ///
+    /// let c = char::from_digit(4, 10);
+    ///
+    /// assert_eq!(Some('4'), c);
+    ///
+    /// // Decimal 11 is a single digit in base 16
+    /// let c = char::from_digit(11, 16);
+    ///
+    /// assert_eq!(Some('b'), c);
+    /// ```
+    ///
+    /// Returning `None` when the input is not a digit:
+    ///
+    /// ```
+    /// use std::char;
+    ///
+    /// let c = char::from_digit(20, 10);
+    ///
+    /// assert_eq!(None, c);
+    /// ```
+    ///
+    /// Passing a large radix, causing a panic:
+    ///
+    /// ```
+    /// use std::thread;
+    /// use std::char;
+    ///
+    /// let result = thread::spawn(|| {
+    ///     // this panics
+    ///     let c = char::from_digit(1, 37);
+    /// }).join();
+    ///
+    /// assert!(result.is_err());
+    /// ```
+    #[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
+    #[inline]
+    pub fn from_digit(num: u32, radix: u32) -> Option<char> {
+        super::convert::from_digit(num, radix)
+    }
+
     /// Checks if a `char` is a digit in the given radix.
     ///
     /// A 'radix' here is sometimes also called a 'base'. A radix of two
diff --git a/src/libcore/char/mod.rs b/src/libcore/char/mod.rs
index d82a482e0f1..bf65c31e135 100644
--- a/src/libcore/char/mod.rs
+++ b/src/libcore/char/mod.rs
@@ -92,7 +92,7 @@ const MAX_THREE_B: u32 = 0x10000;
 /// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
 /// [Code Point]: http://www.unicode.org/glossary/#code_point
 #[stable(feature = "rust1", since = "1.0.0")]
-pub const MAX: char = '\u{10ffff}';
+pub const MAX: char = char::MAX;
 
 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
 /// decoding error.
@@ -100,7 +100,7 @@ pub const MAX: char = '\u{10ffff}';
 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
 /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
 #[stable(feature = "decode_utf16", since = "1.9.0")]
-pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
+pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
 
 /// Returns an iterator that yields the hexadecimal Unicode escape of a
 /// character, as `char`s.