//! Utilities for the `char` primitive type.
//!
//! *[See also the `char` primitive type](primitive@char).*
//!
//! The `char` type represents a single character. More specifically, since
//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
//! scalar value]', which is similar to, but not the same as, a '[Unicode code
//! point]'.
//!
//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
//!
//! This module exists for technical reasons, the primary documentation for
//! `char` is directly on [the `char` primitive type][char] itself.
//!
//! This module is the home of the iterator implementations for the iterators
//! implemented on `char`, as well as some useful constants and conversion
//! functions that convert various types to `char`.

#![allow(non_snake_case)]
#![stable(feature = "rust1", since = "1.0.0")]

mod convert;
mod decode;
mod methods;

// stable re-exports
#[rustfmt::skip]
#[stable(feature = "try_from", since = "1.34.0")]
pub use self::convert::CharTryFromError;
#[stable(feature = "char_from_str", since = "1.20.0")]
pub use self::convert::ParseCharError;
#[stable(feature = "decode_utf16", since = "1.9.0")]
pub use self::decode::{DecodeUtf16, DecodeUtf16Error};

// perma-unstable re-exports
#[rustfmt::skip]
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
pub use self::methods::encode_utf16_raw; // perma-unstable
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
pub use self::methods::encode_utf8_raw; // perma-unstable

#[rustfmt::skip]
use crate::ascii;
pub(crate) use self::methods::EscapeDebugExtArgs;
use crate::error::Error;
use crate::escape;
use crate::fmt::{self, Write};
use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
use crate::num::NonZero;

// UTF-8 ranges and tags for encoding characters
const TAG_CONT: u8 = 0b1000_0000;
const TAG_TWO_B: u8 = 0b1100_0000;
const TAG_THREE_B: u8 = 0b1110_0000;
const TAG_FOUR_B: u8 = 0b1111_0000;
const MAX_ONE_B: u32 = 0x80;
const MAX_TWO_B: u32 = 0x800;
const MAX_THREE_B: u32 = 0x10000;

/*
    Lu  Uppercase_Letter        an uppercase letter
    Ll  Lowercase_Letter        a lowercase letter
    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
    Lm  Modifier_Letter         a modifier letter
    Lo  Other_Letter            other letters, including syllables and ideographs
    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
    Me  Enclosing_Mark          an enclosing combining mark
    Nd  Decimal_Number          a decimal digit
    Nl  Letter_Number           a letterlike numeric character
    No  Other_Number            a numeric character of other type
    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
    Pi  Initial_Punctuation     an initial quotation mark
    Pf  Final_Punctuation       a final quotation mark
    Po  Other_Punctuation       a punctuation mark of other type
    Sm  Math_Symbol             a symbol of primarily mathematical use
    Sc  Currency_Symbol         a currency sign
    Sk  Modifier_Symbol         a non-letterlike modifier symbol
    So  Other_Symbol            a symbol of other type
    Zs  Space_Separator         a space character (of various non-zero widths)
    Zl  Line_Separator          U+2028 LINE SEPARATOR only
    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
    Cc  Control                 a C0 or C1 control code
    Cf  Format                  a format control character
    Cs  Surrogate               a surrogate code point
    Co  Private_Use             a private-use character
    Cn  Unassigned              a reserved unassigned code point or a noncharacter
*/

/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
#[stable(feature = "rust1", since = "1.0.0")]
pub const MAX: char = char::MAX;

/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
/// UTF-8 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;

/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
/// to UTF-16 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;

/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
#[stable(feature = "decode_utf16", since = "1.9.0")]
pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;

/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
#[stable(feature = "unicode_version", since = "1.45.0")]
pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;

/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
#[stable(feature = "decode_utf16", since = "1.9.0")]
#[inline]
pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
    self::decode::decode_utf16(iter)
}

/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
#[must_use]
#[inline]
pub const fn from_u32(i: u32) -> Option<char> {
    self::convert::from_u32(i)
}

/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`]
/// instead.
#[stable(feature = "char_from_unchecked", since = "1.5.0")]
#[rustc_const_stable(feature = "const_char_from_u32_unchecked", since = "1.81.0")]
#[must_use]
#[inline]
pub const unsafe fn from_u32_unchecked(i: u32) -> char {
    // SAFETY: the safety contract must be upheld by the caller.
    unsafe { self::convert::from_u32_unchecked(i) }
}

/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
#[must_use]
#[inline]
pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
    self::convert::from_digit(num, radix)
}

/// Returns an iterator that yields the hexadecimal Unicode escape of a
/// character, as `char`s.
///
/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
/// its documentation for more.
///
/// [`escape_unicode`]: char::escape_unicode
#[derive(Clone, Debug)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct EscapeUnicode(escape::EscapeIterInner<10>);

impl EscapeUnicode {
    #[inline]
    const fn new(c: char) -> Self {
        Self(escape::EscapeIterInner::unicode(c))
    }
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for EscapeUnicode {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<char> {
        self.0.next().map(char::from)
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let n = self.0.len();
        (n, Some(n))
    }

    #[inline]
    fn count(self) -> usize {
        self.0.len()
    }

    #[inline]
    fn last(mut self) -> Option<char> {
        self.0.next_back().map(char::from)
    }

    #[inline]
    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
        self.0.advance_by(n)
    }
}

#[stable(feature = "exact_size_escape", since = "1.11.0")]
impl ExactSizeIterator for EscapeUnicode {
    #[inline]
    fn len(&self) -> usize {
        self.0.len()
    }
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for EscapeUnicode {}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for EscapeUnicode {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(self.0.as_str())
    }
}

/// An iterator that yields the literal escape code of a `char`.
///
/// This `struct` is created by the [`escape_default`] method on [`char`]. See
/// its documentation for more.
///
/// [`escape_default`]: char::escape_default
#[derive(Clone, Debug)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct EscapeDefault(escape::EscapeIterInner<10>);

impl EscapeDefault {
    #[inline]
    const fn printable(c: ascii::Char) -> Self {
        Self(escape::EscapeIterInner::ascii(c.to_u8()))
    }

    #[inline]
    const fn backslash(c: ascii::Char) -> Self {
        Self(escape::EscapeIterInner::backslash(c))
    }

    #[inline]
    const fn unicode(c: char) -> Self {
        Self(escape::EscapeIterInner::unicode(c))
    }
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for EscapeDefault {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<char> {
        self.0.next().map(char::from)
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let n = self.0.len();
        (n, Some(n))
    }

    #[inline]
    fn count(self) -> usize {
        self.0.len()
    }

    #[inline]
    fn last(mut self) -> Option<char> {
        self.0.next_back().map(char::from)
    }

    #[inline]
    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
        self.0.advance_by(n)
    }
}

#[stable(feature = "exact_size_escape", since = "1.11.0")]
impl ExactSizeIterator for EscapeDefault {
    #[inline]
    fn len(&self) -> usize {
        self.0.len()
    }
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for EscapeDefault {}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for EscapeDefault {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(self.0.as_str())
    }
}

/// An iterator that yields the literal escape code of a `char`.
///
/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
/// documentation for more.
///
/// [`escape_debug`]: char::escape_debug
#[stable(feature = "char_escape_debug", since = "1.20.0")]
#[derive(Clone, Debug)]
pub struct EscapeDebug(EscapeDebugInner);

#[derive(Clone, Debug)]
// Note: It’s possible to manually encode the EscapeDebugInner inside of
// EscapeIterInner (e.g. with alive=254..255 indicating that data[0..4] holds
// a char) which would likely result in a more optimised code.  For now we use
// the option easier to implement.
enum EscapeDebugInner {
    Bytes(escape::EscapeIterInner<10>),
    Char(char),
}

impl EscapeDebug {
    #[inline]
    const fn printable(chr: char) -> Self {
        Self(EscapeDebugInner::Char(chr))
    }

    #[inline]
    const fn backslash(c: ascii::Char) -> Self {
        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::backslash(c)))
    }

    #[inline]
    const fn unicode(c: char) -> Self {
        Self(EscapeDebugInner::Bytes(escape::EscapeIterInner::unicode(c)))
    }

    #[inline]
    fn clear(&mut self) {
        self.0 = EscapeDebugInner::Bytes(escape::EscapeIterInner::empty());
    }
}

#[stable(feature = "char_escape_debug", since = "1.20.0")]
impl Iterator for EscapeDebug {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<char> {
        match self.0 {
            EscapeDebugInner::Bytes(ref mut bytes) => bytes.next().map(char::from),
            EscapeDebugInner::Char(chr) => {
                self.clear();
                Some(chr)
            }
        }
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let n = self.len();
        (n, Some(n))
    }

    #[inline]
    fn count(self) -> usize {
        self.len()
    }
}

#[stable(feature = "char_escape_debug", since = "1.20.0")]
impl ExactSizeIterator for EscapeDebug {
    fn len(&self) -> usize {
        match &self.0 {
            EscapeDebugInner::Bytes(bytes) => bytes.len(),
            EscapeDebugInner::Char(_) => 1,
        }
    }
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for EscapeDebug {}

#[stable(feature = "char_escape_debug", since = "1.20.0")]
impl fmt::Display for EscapeDebug {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match &self.0 {
            EscapeDebugInner::Bytes(bytes) => f.write_str(bytes.as_str()),
            EscapeDebugInner::Char(chr) => f.write_char(*chr),
        }
    }
}

macro_rules! casemappingiter_impls {
    ($(#[$attr:meta])* $ITER_NAME:ident) => {
        $(#[$attr])*
        #[stable(feature = "rust1", since = "1.0.0")]
        #[derive(Debug, Clone)]
        pub struct $ITER_NAME(CaseMappingIter);

        #[stable(feature = "rust1", since = "1.0.0")]
        impl Iterator for $ITER_NAME {
            type Item = char;
            fn next(&mut self) -> Option<char> {
                self.0.next()
            }

            fn size_hint(&self) -> (usize, Option<usize>) {
                self.0.size_hint()
            }

            fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
            where
                Fold: FnMut(Acc, Self::Item) -> Acc,
            {
                self.0.fold(init, fold)
            }

            fn count(self) -> usize {
                self.0.count()
            }

            fn last(self) -> Option<Self::Item> {
                self.0.last()
            }

            fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
                self.0.advance_by(n)
            }

            unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
                // SAFETY: just forwarding requirements to caller
                unsafe { self.0.__iterator_get_unchecked(idx) }
            }
        }

        #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
        impl DoubleEndedIterator for $ITER_NAME {
            fn next_back(&mut self) -> Option<char> {
                self.0.next_back()
            }

            fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
            where
                Fold: FnMut(Acc, Self::Item) -> Acc,
            {
                self.0.rfold(init, rfold)
            }

            fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
                self.0.advance_back_by(n)
            }
        }

        #[stable(feature = "fused", since = "1.26.0")]
        impl FusedIterator for $ITER_NAME {}

        #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
        impl ExactSizeIterator for $ITER_NAME {
            fn len(&self) -> usize {
                self.0.len()
            }

            fn is_empty(&self) -> bool {
                self.0.is_empty()
            }
        }

        // SAFETY: forwards to inner `array::IntoIter`
        #[unstable(feature = "trusted_len", issue = "37572")]
        unsafe impl TrustedLen for $ITER_NAME {}

        // SAFETY: forwards to inner `array::IntoIter`
        #[doc(hidden)]
        #[unstable(feature = "std_internals", issue = "none")]
        unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
            const MAY_HAVE_SIDE_EFFECT: bool = false;
        }

        // SAFETY: this iter has no subtypes/supertypes
        #[doc(hidden)]
        #[unstable(feature = "std_internals", issue = "none")]
        unsafe impl TrustedRandomAccess for $ITER_NAME {}

        #[stable(feature = "char_struct_display", since = "1.16.0")]
        impl fmt::Display for $ITER_NAME {
            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                fmt::Display::fmt(&self.0, f)
            }
        }
    }
}

casemappingiter_impls! {
    /// Returns an iterator that yields the lowercase equivalent of a `char`.
    ///
    /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
    /// its documentation for more.
    ///
    /// [`to_lowercase`]: char::to_lowercase
    ToLowercase
}

casemappingiter_impls! {
    /// Returns an iterator that yields the uppercase equivalent of a `char`.
    ///
    /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
    /// its documentation for more.
    ///
    /// [`to_uppercase`]: char::to_uppercase
    ToUppercase
}

#[derive(Debug, Clone)]
struct CaseMappingIter(core::array::IntoIter<char, 3>);

impl CaseMappingIter {
    #[inline]
    fn new(chars: [char; 3]) -> CaseMappingIter {
        let mut iter = chars.into_iter();
        if chars[2] == '\0' {
            iter.next_back();
            if chars[1] == '\0' {
                iter.next_back();

                // Deliberately don't check `chars[0]`,
                // as '\0' lowercases to itself
            }
        }
        CaseMappingIter(iter)
    }
}

impl Iterator for CaseMappingIter {
    type Item = char;

    fn next(&mut self) -> Option<char> {
        self.0.next()
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        self.0.size_hint()
    }

    fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
    where
        Fold: FnMut(Acc, Self::Item) -> Acc,
    {
        self.0.fold(init, fold)
    }

    fn count(self) -> usize {
        self.0.count()
    }

    fn last(self) -> Option<Self::Item> {
        self.0.last()
    }

    fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
        self.0.advance_by(n)
    }

    unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
        // SAFETY: just forwarding requirements to caller
        unsafe { self.0.__iterator_get_unchecked(idx) }
    }
}

impl DoubleEndedIterator for CaseMappingIter {
    fn next_back(&mut self) -> Option<char> {
        self.0.next_back()
    }

    fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
    where
        Fold: FnMut(Acc, Self::Item) -> Acc,
    {
        self.0.rfold(init, rfold)
    }

    fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
        self.0.advance_back_by(n)
    }
}

impl ExactSizeIterator for CaseMappingIter {
    fn len(&self) -> usize {
        self.0.len()
    }

    fn is_empty(&self) -> bool {
        self.0.is_empty()
    }
}

impl FusedIterator for CaseMappingIter {}

// SAFETY: forwards to inner `array::IntoIter`
unsafe impl TrustedLen for CaseMappingIter {}

// SAFETY: forwards to inner `array::IntoIter`
unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
    const MAY_HAVE_SIDE_EFFECT: bool = false;
}

// SAFETY: `CaseMappingIter` has no subtypes/supertypes
unsafe impl TrustedRandomAccess for CaseMappingIter {}

impl fmt::Display for CaseMappingIter {
    #[inline]
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        for c in self.0.clone() {
            f.write_char(c)?;
        }
        Ok(())
    }
}

/// The error type returned when a checked char conversion fails.
#[stable(feature = "u8_from_char", since = "1.59.0")]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct TryFromCharError(pub(crate) ());

#[stable(feature = "u8_from_char", since = "1.59.0")]
impl fmt::Display for TryFromCharError {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        "unicode code point out of range".fmt(fmt)
    }
}

#[stable(feature = "u8_from_char", since = "1.59.0")]
impl Error for TryFromCharError {}