//! Utilities for the `char` primitive type. //! //! *[See also the `char` primitive type](primitive@char).* //! //! The `char` type represents a single character. More specifically, since //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode //! scalar value]', which is similar to, but not the same as, a '[Unicode code //! point]'. //! //! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value //! [Unicode code point]: https://www.unicode.org/glossary/#code_point //! //! This module exists for technical reasons, the primary documentation for //! `char` is directly on [the `char` primitive type][char] itself. //! //! This module is the home of the iterator implementations for the iterators //! implemented on `char`, as well as some useful constants and conversion //! functions that convert various types to `char`. #![allow(non_snake_case)] #![stable(feature = "rust1", since = "1.0.0")] mod convert; mod decode; mod methods; // stable re-exports #[rustfmt::skip] #[stable(feature = "try_from", since = "1.34.0")] pub use self::convert::CharTryFromError; #[stable(feature = "char_from_str", since = "1.20.0")] pub use self::convert::ParseCharError; #[stable(feature = "decode_utf16", since = "1.9.0")] pub use self::decode::{DecodeUtf16, DecodeUtf16Error}; // perma-unstable re-exports #[rustfmt::skip] #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] pub use self::methods::encode_utf16_raw; // perma-unstable #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] pub use self::methods::encode_utf8_raw; // perma-unstable #[rustfmt::skip] use crate::ascii; pub(crate) use self::methods::EscapeDebugExtArgs; use crate::error::Error; use crate::escape; use crate::fmt::{self, Write}; use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; use crate::num::NonZero; // UTF-8 ranges and tags for encoding characters const TAG_CONT: u8 = 0b1000_0000; const TAG_TWO_B: u8 = 0b1100_0000; const TAG_THREE_B: u8 = 0b1110_0000; const TAG_FOUR_B: u8 = 0b1111_0000; const MAX_ONE_B: u32 = 0x80; const MAX_TWO_B: u32 = 0x800; const MAX_THREE_B: u32 = 0x10000; /* Lu Uppercase_Letter an uppercase letter Ll Lowercase_Letter a lowercase letter Lt Titlecase_Letter a digraphic character, with first part uppercase Lm Modifier_Letter a modifier letter Lo Other_Letter other letters, including syllables and ideographs Mn Nonspacing_Mark a nonspacing combining mark (zero advance width) Mc Spacing_Mark a spacing combining mark (positive advance width) Me Enclosing_Mark an enclosing combining mark Nd Decimal_Number a decimal digit Nl Letter_Number a letterlike numeric character No Other_Number a numeric character of other type Pc Connector_Punctuation a connecting punctuation mark, like a tie Pd Dash_Punctuation a dash or hyphen punctuation mark Ps Open_Punctuation an opening punctuation mark (of a pair) Pe Close_Punctuation a closing punctuation mark (of a pair) Pi Initial_Punctuation an initial quotation mark Pf Final_Punctuation a final quotation mark Po Other_Punctuation a punctuation mark of other type Sm Math_Symbol a symbol of primarily mathematical use Sc Currency_Symbol a currency sign Sk Modifier_Symbol a non-letterlike modifier symbol So Other_Symbol a symbol of other type Zs Space_Separator a space character (of various non-zero widths) Zl Line_Separator U+2028 LINE SEPARATOR only Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only Cc Control a C0 or C1 control code Cf Format a format control character Cs Surrogate a surrogate code point Co Private_Use a private-use character Cn Unassigned a reserved unassigned code point or a noncharacter */ /// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead. #[stable(feature = "rust1", since = "1.0.0")] pub const MAX: char = char::MAX; /// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to /// UTF-8 encoding. #[unstable(feature = "char_max_len", issue = "121714")] pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8; /// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char` /// to UTF-16 encoding. #[unstable(feature = "char_max_len", issue = "121714")] pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16; /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a /// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead. #[stable(feature = "decode_utf16", since = "1.9.0")] pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER; /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of /// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead. #[stable(feature = "unicode_version", since = "1.45.0")] pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION; /// Creates an iterator over the UTF-16 encoded code points in `iter`, returning /// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead. #[stable(feature = "decode_utf16", since = "1.9.0")] #[inline] pub fn decode_utf16>(iter: I) -> DecodeUtf16 { self::decode::decode_utf16(iter) } /// Converts a `u32` to a `char`. Use [`char::from_u32`] instead. #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")] #[must_use] #[inline] pub const fn from_u32(i: u32) -> Option { self::convert::from_u32(i) } /// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`] /// instead. #[stable(feature = "char_from_unchecked", since = "1.5.0")] #[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")] #[must_use] #[inline] pub const unsafe fn from_u32_unchecked(i: u32) -> char { // SAFETY: the safety contract must be upheld by the caller. unsafe { self::convert::from_u32_unchecked(i) } } /// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead. #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")] #[must_use] #[inline] pub const fn from_digit(num: u32, radix: u32) -> Option { self::convert::from_digit(num, radix) } /// Returns an iterator that yields the hexadecimal Unicode escape of a /// character, as `char`s. /// /// This `struct` is created by the [`escape_unicode`] method on [`char`]. See /// its documentation for more. /// /// [`escape_unicode`]: char::escape_unicode #[derive(Clone, Debug)] #[stable(feature = "rust1", since = "1.0.0")] pub struct EscapeUnicode(escape::EscapeIterInner<10>); impl EscapeUnicode { #[inline] const fn new(c: char) -> Self { Self(escape::EscapeIterInner::unicode(c)) } } #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for EscapeUnicode { type Item = char; #[inline] fn next(&mut self) -> Option { self.0.next().map(char::from) } #[inline] fn size_hint(&self) -> (usize, Option) { let n = self.0.len(); (n, Some(n)) } #[inline] fn count(self) -> usize { self.0.len() } #[inline] fn last(mut self) -> Option { self.0.next_back().map(char::from) } #[inline] fn advance_by(&mut self, n: usize) -> Result<(), NonZero> { self.0.advance_by(n) } } #[stable(feature = "exact_size_escape", since = "1.11.0")] impl ExactSizeIterator for EscapeUnicode { #[inline] fn len(&self) -> usize { self.0.len() } } #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for EscapeUnicode {} #[stable(feature = "char_struct_display", since = "1.16.0")] impl fmt::Display for EscapeUnicode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.0.as_str()) } } /// An iterator that yields the literal escape code of a `char`. /// /// This `struct` is created by the [`escape_default`] method on [`char`]. See /// its documentation for more. /// /// [`escape_default`]: char::escape_default #[derive(Clone, Debug)] #[stable(feature = "rust1", since = "1.0.0")] pub struct EscapeDefault(escape::EscapeIterInner<10>); impl EscapeDefault { #[inline] const fn printable(c: ascii::Char) -> Self { Self(escape::EscapeIterInner::ascii(c.to_u8())) } #[inline] const fn backslash(c: ascii::Char) -> Self { Self(escape::EscapeIterInner::backslash(c)) } #[inline] const fn unicode(c: char) -> Self { Self(escape::EscapeIterInner::unicode(c)) } } #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for EscapeDefault { type Item = char; #[inline] fn next(&mut self) -> Option { self.0.next().map(char::from) } #[inline] fn size_hint(&self) -> (usize, Option) { let n = self.0.len(); (n, Some(n)) } #[inline] fn count(self) -> usize { self.0.len() } #[inline] fn last(mut self) -> Option { self.0.next_back().map(char::from) } #[inline] fn advance_by(&mut self, n: usize) -> Result<(), NonZero> { self.0.advance_by(n) } } #[stable(feature = "exact_size_escape", since = "1.11.0")] impl ExactSizeIterator for EscapeDefault { #[inline] fn len(&self) -> usize { self.0.len() } } #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for EscapeDefault {} #[stable(feature = "char_struct_display", since = "1.16.0")] impl fmt::Display for EscapeDefault { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.0.as_str()) } } /// An iterator that yields the literal escape code of a `char`. /// /// This `struct` is created by the [`escape_debug`] method on [`char`]. See its /// documentation for more. /// /// [`escape_debug`]: char::escape_debug #[stable(feature = "char_escape_debug", since = "1.20.0")] #[derive(Clone, Debug)] pub struct EscapeDebug(EscapeDebugInner); #[derive(Clone, Debug)] // Note: It’s possible to manually encode the EscapeDebugInner inside of // EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds // a char) which would likely result in a more optimised code. For now we use // the option easier to implement. enum EscapeDebugInner { Bytes(escape::EscapeIterInner<10>), Char(char), } impl EscapeDebug { #[inline] const fn printable(chr: char) -> Self { Self(EscapeDebugInner::Char(chr)) } #[inline] const fn backslash(c: ascii::Char) -> Self { Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c))) } #[inline] const fn unicode(c: char) -> Self { Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c))) } #[inline] fn clear(&mut self) { self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty()); } } #[stable(feature = "char_escape_debug", since = "1.20.0")] impl Iterator for EscapeDebug { type Item = char; #[inline] fn next(&mut self) -> Option { match self.0 { EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from), EscapeDebugInner::Char(chr) => { self.clear(); Some(chr) } } } #[inline] fn size_hint(&self) -> (usize, Option) { let n = self.len(); (n, Some(n)) } #[inline] fn count(self) -> usize { self.len() } } #[stable(feature = "char_escape_debug", since = "1.20.0")] impl ExactSizeIterator for EscapeDebug { fn len(&self) -> usize { match &self.0 { EscapeDebugInner::Bytes(bytes) => bytes.len(), EscapeDebugInner::Char(_) => 1, } } } #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for EscapeDebug {} #[stable(feature = "char_escape_debug", since = "1.20.0")] impl fmt::Display for EscapeDebug { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.0 { EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()), EscapeDebugInner::Char(chr) => f.write_char(*chr), } } } macro_rules! casemappingiter_impls { ($(#[$attr:meta])* $ITER_NAME:ident) => { $(#[$attr])* #[stable(feature = "rust1", since = "1.0.0")] #[derive(Debug, Clone)] pub struct $ITER_NAME(CaseMappingIter); #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for $ITER_NAME { type Item = char; fn next(&mut self) -> Option { self.0.next() } fn size_hint(&self) -> (usize, Option) { self.0.size_hint() } fn fold(self, init: Acc, fold: Fold) -> Acc where Fold: FnMut(Acc, Self::Item) -> Acc, { self.0.fold(init, fold) } fn count(self) -> usize { self.0.count() } fn last(self) -> Option { self.0.last() } fn advance_by(&mut self, n: usize) -> Result<(), NonZero> { self.0.advance_by(n) } unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item { // SAFETY: just forwarding requirements to caller unsafe { self.0.__iterator_get_unchecked(idx) } } } #[stable(feature = "case_mapping_double_ended", since = "1.59.0")] impl DoubleEndedIterator for $ITER_NAME { fn next_back(&mut self) -> Option { self.0.next_back() } fn rfold(self, init: Acc, rfold: Fold) -> Acc where Fold: FnMut(Acc, Self::Item) -> Acc, { self.0.rfold(init, rfold) } fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero> { self.0.advance_back_by(n) } } #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for $ITER_NAME {} #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")] impl ExactSizeIterator for $ITER_NAME { fn len(&self) -> usize { self.0.len() } fn is_empty(&self) -> bool { self.0.is_empty() } } // SAFETY: forwards to inner `array::IntoIter` #[unstable(feature = "trusted_len", issue = "37572")] unsafe impl TrustedLen for $ITER_NAME {} // SAFETY: forwards to inner `array::IntoIter` #[doc(hidden)] #[unstable(feature = "std_internals", issue = "none")] unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME { const MAY_HAVE_SIDE_EFFECT: bool = false; } // SAFETY: this iter has no subtypes/supertypes #[doc(hidden)] #[unstable(feature = "std_internals", issue = "none")] unsafe impl TrustedRandomAccess for $ITER_NAME {} #[stable(feature = "char_struct_display", since = "1.16.0")] impl fmt::Display for $ITER_NAME { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.0, f) } } } } casemappingiter_impls! { /// Returns an iterator that yields the lowercase equivalent of a `char`. /// /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See /// its documentation for more. /// /// [`to_lowercase`]: char::to_lowercase ToLowercase } casemappingiter_impls! { /// Returns an iterator that yields the uppercase equivalent of a `char`. /// /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See /// its documentation for more. /// /// [`to_uppercase`]: char::to_uppercase ToUppercase } #[derive(Debug, Clone)] struct CaseMappingIter(core::array::IntoIter); impl CaseMappingIter { #[inline] fn new(chars: [char; 3]) -> CaseMappingIter { let mut iter = chars.into_iter(); if chars[2] == '\0' { iter.next_back(); if chars[1] == '\0' { iter.next_back(); // Deliberately don't check `chars[0]`, // as '\0' lowercases to itself } } CaseMappingIter(iter) } } impl Iterator for CaseMappingIter { type Item = char; fn next(&mut self) -> Option { self.0.next() } fn size_hint(&self) -> (usize, Option) { self.0.size_hint() } fn fold(self, init: Acc, fold: Fold) -> Acc where Fold: FnMut(Acc, Self::Item) -> Acc, { self.0.fold(init, fold) } fn count(self) -> usize { self.0.count() } fn last(self) -> Option { self.0.last() } fn advance_by(&mut self, n: usize) -> Result<(), NonZero> { self.0.advance_by(n) } unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item { // SAFETY: just forwarding requirements to caller unsafe { self.0.__iterator_get_unchecked(idx) } } } impl DoubleEndedIterator for CaseMappingIter { fn next_back(&mut self) -> Option { self.0.next_back() } fn rfold(self, init: Acc, rfold: Fold) -> Acc where Fold: FnMut(Acc, Self::Item) -> Acc, { self.0.rfold(init, rfold) } fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero> { self.0.advance_back_by(n) } } impl ExactSizeIterator for CaseMappingIter { fn len(&self) -> usize { self.0.len() } fn is_empty(&self) -> bool { self.0.is_empty() } } impl FusedIterator for CaseMappingIter {} // SAFETY: forwards to inner `array::IntoIter` unsafe impl TrustedLen for CaseMappingIter {} // SAFETY: forwards to inner `array::IntoIter` unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter { const MAY_HAVE_SIDE_EFFECT: bool = false; } // SAFETY: `CaseMappingIter` has no subtypes/supertypes unsafe impl TrustedRandomAccess for CaseMappingIter {} impl fmt::Display for CaseMappingIter { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for c in self.0.clone() { f.write_char(c)?; } Ok(()) } } /// The error type returned when a checked char conversion fails. #[stable(feature = "u8_from_char", since = "1.59.0")] #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct TryFromCharError(pub(crate) ()); #[stable(feature = "u8_from_char", since = "1.59.0")] impl fmt::Display for TryFromCharError { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { "unicode code point out of range".fmt(fmt) } } #[stable(feature = "u8_from_char", since = "1.59.0")] impl Error for TryFromCharError {}