//! Basic syntax highlighting functionality.
//!
//! This module uses librustc_ast's lexer to provide token-based highlighting for
//! the HTML documentation generated by rustdoc.
//!
//! Use the `render_with_highlighting` to highlight some rust code.

use std::borrow::Cow;
use std::collections::VecDeque;
use std::fmt::{self, Display, Write};

use rustc_data_structures::fx::FxIndexMap;
use rustc_lexer::{Cursor, FrontmatterAllowed, LiteralKind, TokenKind};
use rustc_span::edition::Edition;
use rustc_span::symbol::Symbol;
use rustc_span::{BytePos, DUMMY_SP, Span};

use super::format::{self, write_str};
use crate::clean::PrimitiveType;
use crate::html::escape::EscapeBodyText;
use crate::html::macro_expansion::ExpandedCode;
use crate::html::render::{Context, LinkFromSrc};

/// This type is needed in case we want to render links on items to allow to go to their definition.
pub(crate) struct HrefContext<'a, 'tcx> {
    pub(crate) context: &'a Context<'tcx>,
    /// This span contains the current file we're going through.
    pub(crate) file_span: Span,
    /// This field is used to know "how far" from the top of the directory we are to link to either
    /// documentation pages or other source pages.
    pub(crate) root_path: &'a str,
    /// This field is used to calculate precise local URLs.
    pub(crate) current_href: String,
}

/// Decorations are represented as a map from CSS class to vector of character ranges.
/// Each range will be wrapped in a span with that class.
#[derive(Default)]
pub(crate) struct DecorationInfo(pub(crate) FxIndexMap<&'static str, Vec<(u32, u32)>>);

#[derive(Eq, PartialEq, Clone)]
pub(crate) enum Tooltip {
    IgnoreAll,
    IgnoreSome(Vec<String>),
    CompileFail,
    ShouldPanic,
    Edition(Edition),
    None,
}

/// Highlights `src` as an inline example, returning the HTML output.
pub(crate) fn render_example_with_highlighting(
    src: &str,
    out: &mut String,
    tooltip: Tooltip,
    playground_button: Option<&str>,
    extra_classes: &[String],
) {
    write_header(out, "rust-example-rendered", None, tooltip, extra_classes);
    write_code(out, src, None, None, None);
    write_footer(out, playground_button);
}

fn write_header(
    out: &mut String,
    class: &str,
    extra_content: Option<&str>,
    tooltip: Tooltip,
    extra_classes: &[String],
) {
    write_str(
        out,
        format_args!(
            "<div class=\"example-wrap{}\">",
            match tooltip {
                Tooltip::IgnoreAll | Tooltip::IgnoreSome(_) => " ignore",
                Tooltip::CompileFail => " compile_fail",
                Tooltip::ShouldPanic => " should_panic",
                Tooltip::Edition(_) => " edition",
                Tooltip::None => "",
            }
        ),
    );

    if tooltip != Tooltip::None {
        let tooltip = fmt::from_fn(|f| match &tooltip {
            Tooltip::IgnoreAll => f.write_str("This example is not tested"),
            Tooltip::IgnoreSome(platforms) => {
                f.write_str("This example is not tested on ")?;
                match &platforms[..] {
                    [] => unreachable!(),
                    [platform] => f.write_str(platform)?,
                    [first, second] => write!(f, "{first} or {second}")?,
                    [platforms @ .., last] => {
                        for platform in platforms {
                            write!(f, "{platform}, ")?;
                        }
                        write!(f, "or {last}")?;
                    }
                }
                Ok(())
            }
            Tooltip::CompileFail => f.write_str("This example deliberately fails to compile"),
            Tooltip::ShouldPanic => f.write_str("This example panics"),
            Tooltip::Edition(edition) => write!(f, "This example runs with edition {edition}"),
            Tooltip::None => unreachable!(),
        });
        write_str(out, format_args!("<a href=\"#\" class=\"tooltip\" title=\"{tooltip}\">ⓘ</a>"));
    }

    if let Some(extra) = extra_content {
        out.push_str(extra);
    }
    if class.is_empty() {
        write_str(
            out,
            format_args!(
                "<pre class=\"rust{}{}\">",
                if extra_classes.is_empty() { "" } else { " " },
                extra_classes.join(" ")
            ),
        );
    } else {
        write_str(
            out,
            format_args!(
                "<pre class=\"rust {class}{}{}\">",
                if extra_classes.is_empty() { "" } else { " " },
                extra_classes.join(" ")
            ),
        );
    }
    write_str(out, format_args!("<code>"));
}

/// Check if two `Class` can be merged together. In the following rules, "unclassified" means `None`
/// basically (since it's `Option<Class>`). The following rules apply:
///
/// * If two `Class` have the same variant, then they can be merged.
/// * If the other `Class` is unclassified and only contains white characters (backline,
///   whitespace, etc), it can be merged.
/// * `Class::Ident` is considered the same as unclassified (because it doesn't have an associated
///   CSS class).
fn can_merge(class1: Option<Class>, class2: Option<Class>, text: &str) -> bool {
    match (class1, class2) {
        (Some(c1), Some(c2)) => c1.is_equal_to(c2),
        (Some(Class::Ident(_)), None) | (None, Some(Class::Ident(_))) => true,
        (Some(Class::Macro(_)), _) => false,
        (Some(_), None) | (None, Some(_)) => text.trim().is_empty(),
        (None, None) => true,
    }
}

/// This type is used as a conveniency to prevent having to pass all its fields as arguments into
/// the various functions (which became its methods).
struct TokenHandler<'a, 'tcx, F: Write> {
    out: &'a mut F,
    /// It contains the closing tag and the associated `Class`.
    closing_tags: Vec<(&'static str, Class)>,
    /// This is used because we don't automatically generate the closing tag on `ExitSpan` in
    /// case an `EnterSpan` event with the same class follows.
    pending_exit_span: Option<Class>,
    /// `current_class` and `pending_elems` are used to group HTML elements with same `class`
    /// attributes to reduce the DOM size.
    current_class: Option<Class>,
    /// We need to keep the `Class` for each element because it could contain a `Span` which is
    /// used to generate links.
    pending_elems: Vec<(Cow<'a, str>, Option<Class>)>,
    href_context: Option<HrefContext<'a, 'tcx>>,
    write_line_number: fn(&mut F, u32, &'static str),
}

impl<F: Write> std::fmt::Debug for TokenHandler<'_, '_, F> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("TokenHandler")
            .field("closing_tags", &self.closing_tags)
            .field("pending_exit_span", &self.pending_exit_span)
            .field("current_class", &self.current_class)
            .field("pending_elems", &self.pending_elems)
            .finish()
    }
}

impl<F: Write> TokenHandler<'_, '_, F> {
    fn handle_exit_span(&mut self) {
        // We can't get the last `closing_tags` element using `pop()` because `closing_tags` is
        // being used in `write_pending_elems`.
        let class = self.closing_tags.last().expect("ExitSpan without EnterSpan").1;
        // We flush everything just in case...
        self.write_pending_elems(Some(class));

        exit_span(self.out, self.closing_tags.pop().expect("ExitSpan without EnterSpan").0);
        self.pending_exit_span = None;
    }

    /// Write all the pending elements sharing a same (or at mergeable) `Class`.
    ///
    /// If there is a "parent" (if a `EnterSpan` event was encountered) and the parent can be merged
    /// with the elements' class, then we simply write the elements since the `ExitSpan` event will
    /// close the tag.
    ///
    /// Otherwise, if there is only one pending element, we let the `string` function handle both
    /// opening and closing the tag, otherwise we do it into this function.
    ///
    /// It returns `true` if `current_class` must be set to `None` afterwards.
    fn write_pending_elems(&mut self, current_class: Option<Class>) -> bool {
        if self.pending_elems.is_empty() {
            return false;
        }
        if let Some((_, parent_class)) = self.closing_tags.last()
            && can_merge(current_class, Some(*parent_class), "")
        {
            for (text, class) in self.pending_elems.iter() {
                string(
                    self.out,
                    EscapeBodyText(text),
                    *class,
                    &self.href_context,
                    false,
                    self.write_line_number,
                );
            }
        } else {
            // We only want to "open" the tag ourselves if we have more than one pending and if the
            // current parent tag is not the same as our pending content.
            let close_tag = if self.pending_elems.len() > 1
                && let Some(current_class) = current_class
                // `PreludeTy` can never include more than an ident so it should not generate
                // a wrapping `span`.
                && !matches!(current_class, Class::PreludeTy(_))
            {
                Some(enter_span(self.out, current_class, &self.href_context))
            } else {
                None
            };
            // To prevent opening a macro expansion span being closed right away because
            // the currently open item is replaced by a new class.
            let last_pending =
                self.pending_elems.pop_if(|(_, class)| *class == Some(Class::Expansion));
            for (text, class) in self.pending_elems.iter() {
                string(
                    self.out,
                    EscapeBodyText(text),
                    *class,
                    &self.href_context,
                    close_tag.is_none(),
                    self.write_line_number,
                );
            }
            if let Some(close_tag) = close_tag {
                exit_span(self.out, close_tag);
            }
            if let Some((text, class)) = last_pending {
                string(
                    self.out,
                    EscapeBodyText(&text),
                    class,
                    &self.href_context,
                    close_tag.is_none(),
                    self.write_line_number,
                );
            }
        }
        self.pending_elems.clear();
        true
    }

    #[inline]
    fn write_line_number(&mut self, line: u32, extra: &'static str) {
        (self.write_line_number)(self.out, line, extra);
    }
}

impl<F: Write> Drop for TokenHandler<'_, '_, F> {
    /// When leaving, we need to flush all pending data to not have missing content.
    fn drop(&mut self) {
        if self.pending_exit_span.is_some() {
            self.handle_exit_span();
        } else {
            self.write_pending_elems(self.current_class);
        }
    }
}

fn write_scraped_line_number(out: &mut impl Write, line: u32, extra: &'static str) {
    // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#data-nosnippet-attr
    // Do not show "1 2 3 4 5 ..." in web search results.
    write!(out, "{extra}<span data-nosnippet>{line}</span>",).unwrap();
}

fn write_line_number(out: &mut impl Write, line: u32, extra: &'static str) {
    // https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#data-nosnippet-attr
    // Do not show "1 2 3 4 5 ..." in web search results.
    write!(out, "{extra}<a href=#{line} id={line} data-nosnippet>{line}</a>",).unwrap();
}

fn empty_line_number(out: &mut impl Write, _: u32, extra: &'static str) {
    out.write_str(extra).unwrap();
}

fn get_next_expansion(
    expanded_codes: &[ExpandedCode],
    line: u32,
    span: Span,
) -> Option<&ExpandedCode> {
    expanded_codes.iter().find(|code| code.start_line == line && code.span.lo() > span.lo())
}

fn get_expansion<'a, W: Write>(
    token_handler: &mut TokenHandler<'_, '_, W>,
    expanded_codes: &'a [ExpandedCode],
    line: u32,
    span: Span,
) -> Option<&'a ExpandedCode> {
    if let Some(expanded_code) = get_next_expansion(expanded_codes, line, span) {
        let (closing, reopening) = if let Some(current_class) = token_handler.current_class
            && let class = current_class.as_html()
            && !class.is_empty()
        {
            ("</span>", format!("<span class=\"{class}\">"))
        } else {
            ("", String::new())
        };
        let id = format!("expand-{line}");
        token_handler.pending_elems.push((
            Cow::Owned(format!(
                "{closing}\
<span class=expansion>\
    <input id={id} \
           tabindex=0 \
           type=checkbox \
           aria-label=\"Collapse/expand macro\" \
           title=\"\"Collapse/expand macro\">{reopening}",
            )),
            Some(Class::Expansion),
        ));
        Some(expanded_code)
    } else {
        None
    }
}

fn start_expansion(out: &mut Vec<(Cow<'_, str>, Option<Class>)>, expanded_code: &ExpandedCode) {
    out.push((
        Cow::Owned(format!(
            "<span class=expanded>{}</span><span class=original>",
            expanded_code.code,
        )),
        Some(Class::Expansion),
    ));
}

fn end_expansion<'a, W: Write>(
    token_handler: &mut TokenHandler<'_, '_, W>,
    expanded_codes: &'a [ExpandedCode],
    expansion_start_tags: &[(&'static str, Class)],
    line: u32,
    span: Span,
) -> Option<&'a ExpandedCode> {
    if let Some(expanded_code) = get_next_expansion(expanded_codes, line, span) {
        // We close the current "original" content.
        token_handler.pending_elems.push((Cow::Borrowed("</span>"), Some(Class::Expansion)));
        return Some(expanded_code);
    }
    if expansion_start_tags.is_empty() && token_handler.closing_tags.is_empty() {
        // No need tag opened so we can just close expansion.
        token_handler.pending_elems.push((Cow::Borrowed("</span></span>"), Some(Class::Expansion)));
        return None;
    }

    // If tags were opened inside the expansion, we need to close them and re-open them outside
    // of the expansion span.
    let mut out = String::new();
    let mut end = String::new();

    let mut closing_tags = token_handler.closing_tags.iter().peekable();
    let mut start_closing_tags = expansion_start_tags.iter().peekable();

    while let (Some(tag), Some(start_tag)) = (closing_tags.peek(), start_closing_tags.peek())
        && tag == start_tag
    {
        closing_tags.next();
        start_closing_tags.next();
    }
    for (tag, class) in start_closing_tags.chain(closing_tags) {
        out.push_str(tag);
        end.push_str(&format!("<span class=\"{}\">", class.as_html()));
    }
    token_handler
        .pending_elems
        .push((Cow::Owned(format!("</span></span>{out}{end}")), Some(Class::Expansion)));
    None
}

#[derive(Clone, Copy)]
pub(super) struct LineInfo {
    pub(super) start_line: u32,
    max_lines: u32,
    pub(super) is_scraped_example: bool,
}

impl LineInfo {
    pub(super) fn new(max_lines: u32) -> Self {
        Self { start_line: 1, max_lines: max_lines + 1, is_scraped_example: false }
    }

    pub(super) fn new_scraped(max_lines: u32, start_line: u32) -> Self {
        Self {
            start_line: start_line + 1,
            max_lines: max_lines + start_line + 1,
            is_scraped_example: true,
        }
    }
}

/// Convert the given `src` source code into HTML by adding classes for highlighting.
///
/// This code is used to render code blocks (in the documentation) as well as the source code pages.
///
/// Some explanations on the last arguments:
///
/// In case we are rendering a code block and not a source code file, `href_context` will be `None`.
/// To put it more simply: if `href_context` is `None`, the code won't try to generate links to an
/// item definition.
///
/// More explanations about spans and how we use them here are provided in the
pub(super) fn write_code(
    out: &mut impl Write,
    src: &str,
    href_context: Option<HrefContext<'_, '_>>,
    decoration_info: Option<&DecorationInfo>,
    line_info: Option<LineInfo>,
) {
    // This replace allows to fix how the code source with DOS backline characters is displayed.
    let src = src.replace("\r\n", "\n");
    let mut token_handler = TokenHandler {
        out,
        closing_tags: Vec::new(),
        pending_exit_span: None,
        current_class: None,
        pending_elems: Vec::with_capacity(20),
        href_context,
        write_line_number: match line_info {
            Some(line_info) => {
                if line_info.is_scraped_example {
                    write_scraped_line_number
                } else {
                    write_line_number
                }
            }
            None => empty_line_number,
        },
    };

    let (mut line, max_lines) = if let Some(line_info) = line_info {
        token_handler.write_line_number(line_info.start_line, "");
        (line_info.start_line, line_info.max_lines)
    } else {
        (0, u32::MAX)
    };

    let (expanded_codes, file_span) = match token_handler.href_context.as_ref().and_then(|c| {
        let expanded_codes = c.context.shared.expanded_codes.get(&c.file_span.lo())?;
        Some((expanded_codes, c.file_span))
    }) {
        Some((expanded_codes, file_span)) => (expanded_codes.as_slice(), file_span),
        None => (&[] as &[ExpandedCode], DUMMY_SP),
    };
    let mut current_expansion = get_expansion(&mut token_handler, expanded_codes, line, file_span);
    token_handler.write_pending_elems(None);
    let mut expansion_start_tags = Vec::new();

    Classifier::new(
        &src,
        token_handler.href_context.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP),
        decoration_info,
    )
    .highlight(&mut |span, highlight| {
        match highlight {
            Highlight::Token { text, class } => {
                // If we received a `ExitSpan` event and then have a non-compatible `Class`, we
                // need to close the `<span>`.
                let need_current_class_update = if let Some(pending) =
                    token_handler.pending_exit_span
                    && !can_merge(Some(pending), class, text)
                {
                    token_handler.handle_exit_span();
                    true
                // If the two `Class` are different, time to flush the current content and start
                // a new one.
                } else if !can_merge(token_handler.current_class, class, text) {
                    token_handler.write_pending_elems(token_handler.current_class);
                    true
                } else {
                    token_handler.current_class.is_none()
                };

                if need_current_class_update {
                    token_handler.current_class = class.map(Class::dummy);
                }
                if text == "\n" {
                    line += 1;
                    if line < max_lines {
                        token_handler
                            .pending_elems
                            .push((Cow::Borrowed(text), Some(Class::Backline(line))));
                    }
                    if current_expansion.is_none() {
                        current_expansion =
                            get_expansion(&mut token_handler, expanded_codes, line, span);
                        expansion_start_tags = token_handler.closing_tags.clone();
                    }
                    if let Some(ref current_expansion) = current_expansion
                        && current_expansion.span.lo() == span.hi()
                    {
                        start_expansion(&mut token_handler.pending_elems, current_expansion);
                    }
                } else {
                    token_handler.pending_elems.push((Cow::Borrowed(text), class));

                    let mut need_end = false;
                    if let Some(ref current_expansion) = current_expansion {
                        if current_expansion.span.lo() == span.hi() {
                            start_expansion(&mut token_handler.pending_elems, current_expansion);
                        } else if current_expansion.end_line == line
                            && span.hi() >= current_expansion.span.hi()
                        {
                            need_end = true;
                        }
                    }
                    if need_end {
                        current_expansion = end_expansion(
                            &mut token_handler,
                            expanded_codes,
                            &expansion_start_tags,
                            line,
                            span,
                        );
                    }
                }
            }
            Highlight::EnterSpan { class } => {
                let mut should_add = true;
                if let Some(pending_exit_span) = token_handler.pending_exit_span {
                    if class.is_equal_to(pending_exit_span) {
                        should_add = false;
                    } else {
                        token_handler.handle_exit_span();
                    }
                } else {
                    // We flush everything just in case...
                    if token_handler.write_pending_elems(token_handler.current_class) {
                        token_handler.current_class = None;
                    }
                }
                if should_add {
                    let closing_tag =
                        enter_span(token_handler.out, class, &token_handler.href_context);
                    token_handler.closing_tags.push((closing_tag, class));
                }

                token_handler.current_class = None;
                token_handler.pending_exit_span = None;
            }
            Highlight::ExitSpan => {
                token_handler.current_class = None;
                token_handler.pending_exit_span = Some(
                    token_handler
                        .closing_tags
                        .last()
                        .as_ref()
                        .expect("ExitSpan without EnterSpan")
                        .1,
                );
            }
        };
    });
}

fn write_footer(out: &mut String, playground_button: Option<&str>) {
    write_str(out, format_args!("</code></pre>{}</div>", playground_button.unwrap_or_default()));
}

/// How a span of text is classified. Mostly corresponds to token kinds.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum Class {
    Comment,
    DocComment,
    Attribute,
    KeyWord,
    /// Keywords that do pointer/reference stuff.
    RefKeyWord,
    Self_(Span),
    Macro(Span),
    MacroNonTerminal,
    String,
    Number,
    Bool,
    /// `Ident` isn't rendered in the HTML but we still need it for the `Span` it contains.
    Ident(Span),
    Lifetime,
    PreludeTy(Span),
    PreludeVal(Span),
    QuestionMark,
    Decoration(&'static str),
    Backline(u32),
    /// Macro expansion.
    Expansion,
}

impl Class {
    /// It is only looking at the variant, not the variant content.
    ///
    /// It is used mostly to group multiple similar HTML elements into one `<span>` instead of
    /// multiple ones.
    fn is_equal_to(self, other: Self) -> bool {
        match (self, other) {
            (Self::Self_(_), Self::Self_(_))
            | (Self::Macro(_), Self::Macro(_))
            | (Self::Ident(_), Self::Ident(_)) => true,
            (Self::Decoration(c1), Self::Decoration(c2)) => c1 == c2,
            (x, y) => x == y,
        }
    }

    /// If `self` contains a `Span`, it'll be replaced with `DUMMY_SP` to prevent creating links
    /// on "empty content" (because of the attributes merge).
    fn dummy(self) -> Self {
        match self {
            Self::Self_(_) => Self::Self_(DUMMY_SP),
            Self::Macro(_) => Self::Macro(DUMMY_SP),
            Self::Ident(_) => Self::Ident(DUMMY_SP),
            s => s,
        }
    }

    /// Returns the css class expected by rustdoc for each `Class`.
    fn as_html(self) -> &'static str {
        match self {
            Class::Comment => "comment",
            Class::DocComment => "doccomment",
            Class::Attribute => "attr",
            Class::KeyWord => "kw",
            Class::RefKeyWord => "kw-2",
            Class::Self_(_) => "self",
            Class::Macro(_) => "macro",
            Class::MacroNonTerminal => "macro-nonterminal",
            Class::String => "string",
            Class::Number => "number",
            Class::Bool => "bool-val",
            Class::Ident(_) => "",
            Class::Lifetime => "lifetime",
            Class::PreludeTy(_) => "prelude-ty",
            Class::PreludeVal(_) => "prelude-val",
            Class::QuestionMark => "question-mark",
            Class::Decoration(kind) => kind,
            Class::Backline(_) => "",
            Class::Expansion => "",
        }
    }

    /// In case this is an item which can be converted into a link to a definition, it'll contain
    /// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`).
    fn get_span(self) -> Option<Span> {
        match self {
            Self::Ident(sp)
            | Self::Self_(sp)
            | Self::Macro(sp)
            | Self::PreludeTy(sp)
            | Self::PreludeVal(sp) => Some(sp),
            Self::Comment
            | Self::DocComment
            | Self::Attribute
            | Self::KeyWord
            | Self::RefKeyWord
            | Self::MacroNonTerminal
            | Self::String
            | Self::Number
            | Self::Bool
            | Self::Lifetime
            | Self::QuestionMark
            | Self::Decoration(_)
            | Self::Backline(_)
            | Self::Expansion => None,
        }
    }
}

#[derive(Debug)]
enum Highlight<'a> {
    Token { text: &'a str, class: Option<Class> },
    EnterSpan { class: Class },
    ExitSpan,
}

struct TokenIter<'a> {
    src: &'a str,
    cursor: Cursor<'a>,
}

impl<'a> Iterator for TokenIter<'a> {
    type Item = (TokenKind, &'a str);
    fn next(&mut self) -> Option<(TokenKind, &'a str)> {
        let token = self.cursor.advance_token();
        if token.kind == TokenKind::Eof {
            return None;
        }
        let (text, rest) = self.src.split_at(token.len as usize);
        self.src = rest;
        Some((token.kind, text))
    }
}

/// Classifies into identifier class; returns `None` if this is a non-keyword identifier.
fn get_real_ident_class(text: &str, allow_path_keywords: bool) -> Option<Class> {
    let ignore: &[&str] =
        if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] };
    if ignore.contains(&text) {
        return None;
    }
    Some(match text {
        "ref" | "mut" => Class::RefKeyWord,
        "false" | "true" => Class::Bool,
        _ if Symbol::intern(text).is_reserved(|| Edition::Edition2021) => Class::KeyWord,
        _ => return None,
    })
}

/// This iterator comes from the same idea than "Peekable" except that it allows to "peek" more than
/// just the next item by using `peek_next`. The `peek` method always returns the next item after
/// the current one whereas `peek_next` will return the next item after the last one peeked.
///
/// You can use both `peek` and `peek_next` at the same time without problem.
struct PeekIter<'a> {
    stored: VecDeque<(TokenKind, &'a str)>,
    /// This position is reinitialized when using `next`. It is used in `peek_next`.
    peek_pos: usize,
    iter: TokenIter<'a>,
}

impl<'a> PeekIter<'a> {
    fn new(iter: TokenIter<'a>) -> Self {
        Self { stored: VecDeque::new(), peek_pos: 0, iter }
    }
    /// Returns the next item after the current one. It doesn't interfere with `peek_next` output.
    fn peek(&mut self) -> Option<&(TokenKind, &'a str)> {
        if self.stored.is_empty()
            && let Some(next) = self.iter.next()
        {
            self.stored.push_back(next);
        }
        self.stored.front()
    }
    /// Returns the next item after the last one peeked. It doesn't interfere with `peek` output.
    fn peek_next(&mut self) -> Option<&(TokenKind, &'a str)> {
        self.peek_pos += 1;
        if self.peek_pos - 1 < self.stored.len() {
            self.stored.get(self.peek_pos - 1)
        } else if let Some(next) = self.iter.next() {
            self.stored.push_back(next);
            self.stored.back()
        } else {
            None
        }
    }
}

impl<'a> Iterator for PeekIter<'a> {
    type Item = (TokenKind, &'a str);
    fn next(&mut self) -> Option<Self::Item> {
        self.peek_pos = 0;
        if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() }
    }
}

/// Custom spans inserted into the source. Eg --scrape-examples uses this to highlight function calls
struct Decorations {
    starts: Vec<(u32, &'static str)>,
    ends: Vec<u32>,
}

impl Decorations {
    fn new(info: &DecorationInfo) -> Self {
        // Extract tuples (start, end, kind) into separate sequences of (start, kind) and (end).
        let (mut starts, mut ends): (Vec<_>, Vec<_>) = info
            .0
            .iter()
            .flat_map(|(&kind, ranges)| ranges.iter().map(move |&(lo, hi)| ((lo, kind), hi)))
            .unzip();

        // Sort the sequences in document order.
        starts.sort_by_key(|(lo, _)| *lo);
        ends.sort();

        Decorations { starts, ends }
    }
}

/// Convenient wrapper to create a [`Span`] from a position in the file.
fn new_span(lo: u32, text: &str, file_span: Span) -> Span {
    let hi = lo + text.len() as u32;
    let file_lo = file_span.lo();
    file_span.with_lo(file_lo + BytePos(lo)).with_hi(file_lo + BytePos(hi))
}

/// Processes program tokens, classifying strings of text by highlighting
/// category (`Class`).
struct Classifier<'src> {
    tokens: PeekIter<'src>,
    in_attribute: bool,
    in_macro: bool,
    in_macro_nonterminal: bool,
    byte_pos: u32,
    file_span: Span,
    src: &'src str,
    decorations: Option<Decorations>,
}

impl<'src> Classifier<'src> {
    /// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
    /// file span which will be used later on by the `span_correspondence_map`.
    fn new(src: &'src str, file_span: Span, decoration_info: Option<&DecorationInfo>) -> Self {
        let tokens =
            PeekIter::new(TokenIter { src, cursor: Cursor::new(src, FrontmatterAllowed::Yes) });
        let decorations = decoration_info.map(Decorations::new);
        Classifier {
            tokens,
            in_attribute: false,
            in_macro: false,
            in_macro_nonterminal: false,
            byte_pos: 0,
            file_span,
            src,
            decorations,
        }
    }

    /// Concatenate colons and idents as one when possible.
    fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
        let start = self.byte_pos as usize;
        let mut pos = start;
        let mut has_ident = false;

        loop {
            let mut nb = 0;
            while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
                self.tokens.next();
                nb += 1;
            }
            // Ident path can start with "::" but if we already have content in the ident path,
            // the "::" is mandatory.
            if has_ident && nb == 0 {
                return vec![(TokenKind::Ident, start, pos)];
            } else if nb != 0 && nb != 2 {
                if has_ident {
                    return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
                } else {
                    return vec![(TokenKind::Colon, start, pos + nb)];
                }
            }

            if let Some((None, text)) = self.tokens.peek().map(|(token, text)| {
                if *token == TokenKind::Ident {
                    let class = get_real_ident_class(text, true);
                    (class, text)
                } else {
                    // Doesn't matter which Class we put in here...
                    (Some(Class::Comment), text)
                }
            }) {
                // We only "add" the colon if there is an ident behind.
                pos += text.len() + nb;
                has_ident = true;
                self.tokens.next();
            } else if nb > 0 && has_ident {
                return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
            } else if nb > 0 {
                return vec![(TokenKind::Colon, start, start + nb)];
            } else if has_ident {
                return vec![(TokenKind::Ident, start, pos)];
            } else {
                return Vec::new();
            }
        }
    }

    /// Wraps the tokens iteration to ensure that the `byte_pos` is always correct.
    ///
    /// It returns the token's kind, the token as a string and its byte position in the source
    /// string.
    fn next(&mut self) -> Option<(TokenKind, &'src str, u32)> {
        if let Some((kind, text)) = self.tokens.next() {
            let before = self.byte_pos;
            self.byte_pos += text.len() as u32;
            Some((kind, text, before))
        } else {
            None
        }
    }

    /// Exhausts the `Classifier` writing the output into `sink`.
    ///
    /// The general structure for this method is to iterate over each token,
    /// possibly giving it an HTML span with a class specifying what flavor of
    /// token is used.
    fn highlight(mut self, sink: &mut dyn FnMut(Span, Highlight<'src>)) {
        loop {
            if let Some(decs) = self.decorations.as_mut() {
                let byte_pos = self.byte_pos;
                let n_starts = decs.starts.iter().filter(|(i, _)| byte_pos >= *i).count();
                for (_, kind) in decs.starts.drain(0..n_starts) {
                    sink(DUMMY_SP, Highlight::EnterSpan { class: Class::Decoration(kind) });
                }

                let n_ends = decs.ends.iter().filter(|i| byte_pos >= **i).count();
                for _ in decs.ends.drain(0..n_ends) {
                    sink(DUMMY_SP, Highlight::ExitSpan);
                }
            }

            if self
                .tokens
                .peek()
                .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
                .unwrap_or(false)
            {
                let tokens = self.get_full_ident_path();
                for (token, start, end) in &tokens {
                    let text = &self.src[*start..*end];
                    self.advance(*token, text, sink, *start as u32);
                    self.byte_pos += text.len() as u32;
                }
                if !tokens.is_empty() {
                    continue;
                }
            }
            if let Some((token, text, before)) = self.next() {
                self.advance(token, text, sink, before);
            } else {
                break;
            }
        }
    }

    /// Single step of highlighting. This will classify `token`, but maybe also a couple of
    /// following ones as well.
    ///
    /// `before` is the position of the given token in the `source` string and is used as "lo" byte
    /// in case we want to try to generate a link for this token using the
    /// `span_correspondence_map`.
    fn advance(
        &mut self,
        token: TokenKind,
        text: &'src str,
        sink: &mut dyn FnMut(Span, Highlight<'src>),
        before: u32,
    ) {
        let lookahead = self.peek();
        let file_span = self.file_span;
        let no_highlight = |sink: &mut dyn FnMut(_, _)| {
            sink(new_span(before, text, file_span), Highlight::Token { text, class: None })
        };
        let whitespace = |sink: &mut dyn FnMut(_, _)| {
            let mut start = 0u32;
            for part in text.split('\n').intersperse("\n").filter(|s| !s.is_empty()) {
                sink(
                    new_span(before + start, part, file_span),
                    Highlight::Token { text: part, class: None },
                );
                start += part.len() as u32;
            }
        };
        let class = match token {
            TokenKind::Whitespace => return whitespace(sink),
            TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
                if doc_style.is_some() {
                    Class::DocComment
                } else {
                    Class::Comment
                }
            }
            // Consider this as part of a macro invocation if there was a
            // leading identifier.
            TokenKind::Bang if self.in_macro => {
                self.in_macro = false;
                sink(new_span(before, text, file_span), Highlight::Token { text, class: None });
                sink(DUMMY_SP, Highlight::ExitSpan);
                return;
            }

            // Assume that '&' or '*' is the reference or dereference operator
            // or a reference or pointer type. Unless, of course, it looks like
            // a logical and or a multiplication operator: `&&` or `* `.
            TokenKind::Star => match self.tokens.peek() {
                Some((TokenKind::Whitespace, _)) => return whitespace(sink),
                Some((TokenKind::Ident, "mut")) => {
                    self.next();
                    sink(
                        DUMMY_SP,
                        Highlight::Token { text: "*mut", class: Some(Class::RefKeyWord) },
                    );
                    return;
                }
                Some((TokenKind::Ident, "const")) => {
                    self.next();
                    sink(
                        DUMMY_SP,
                        Highlight::Token { text: "*const", class: Some(Class::RefKeyWord) },
                    );
                    return;
                }
                _ => Class::RefKeyWord,
            },
            TokenKind::And => match self.tokens.peek() {
                Some((TokenKind::And, _)) => {
                    self.next();
                    sink(DUMMY_SP, Highlight::Token { text: "&&", class: None });
                    return;
                }
                Some((TokenKind::Eq, _)) => {
                    self.next();
                    sink(DUMMY_SP, Highlight::Token { text: "&=", class: None });
                    return;
                }
                Some((TokenKind::Whitespace, _)) => return whitespace(sink),
                Some((TokenKind::Ident, "mut")) => {
                    self.next();
                    sink(
                        DUMMY_SP,
                        Highlight::Token { text: "&mut", class: Some(Class::RefKeyWord) },
                    );
                    return;
                }
                _ => Class::RefKeyWord,
            },

            // These can either be operators, or arrows.
            TokenKind::Eq => match lookahead {
                Some(TokenKind::Eq) => {
                    self.next();
                    sink(DUMMY_SP, Highlight::Token { text: "==", class: None });
                    return;
                }
                Some(TokenKind::Gt) => {
                    self.next();
                    sink(DUMMY_SP, Highlight::Token { text: "=>", class: None });
                    return;
                }
                _ => return no_highlight(sink),
            },
            TokenKind::Minus if lookahead == Some(TokenKind::Gt) => {
                self.next();
                sink(DUMMY_SP, Highlight::Token { text: "->", class: None });
                return;
            }

            // Other operators.
            TokenKind::Minus
            | TokenKind::Plus
            | TokenKind::Or
            | TokenKind::Slash
            | TokenKind::Caret
            | TokenKind::Percent
            | TokenKind::Bang
            | TokenKind::Lt
            | TokenKind::Gt => return no_highlight(sink),

            // Miscellaneous, no highlighting.
            TokenKind::Dot
            | TokenKind::Semi
            | TokenKind::Comma
            | TokenKind::OpenParen
            | TokenKind::CloseParen
            | TokenKind::OpenBrace
            | TokenKind::CloseBrace
            | TokenKind::OpenBracket
            | TokenKind::At
            | TokenKind::Tilde
            | TokenKind::Colon
            | TokenKind::Frontmatter { .. }
            | TokenKind::Unknown => return no_highlight(sink),

            TokenKind::Question => Class::QuestionMark,

            TokenKind::Dollar => match lookahead {
                Some(TokenKind::Ident) => {
                    self.in_macro_nonterminal = true;
                    Class::MacroNonTerminal
                }
                _ => return no_highlight(sink),
            },

            // This might be the start of an attribute. We're going to want to
            // continue highlighting it as an attribute until the ending ']' is
            // seen, so skip out early. Down below we terminate the attribute
            // span when we see the ']'.
            TokenKind::Pound => {
                match lookahead {
                    // Case 1: #![inner_attribute]
                    Some(TokenKind::Bang) => {
                        self.next();
                        if let Some(TokenKind::OpenBracket) = self.peek() {
                            self.in_attribute = true;
                            sink(
                                new_span(before, text, file_span),
                                Highlight::EnterSpan { class: Class::Attribute },
                            );
                        }
                        sink(DUMMY_SP, Highlight::Token { text: "#", class: None });
                        sink(DUMMY_SP, Highlight::Token { text: "!", class: None });
                        return;
                    }
                    // Case 2: #[outer_attribute]
                    Some(TokenKind::OpenBracket) => {
                        self.in_attribute = true;
                        sink(
                            new_span(before, text, file_span),
                            Highlight::EnterSpan { class: Class::Attribute },
                        );
                    }
                    _ => (),
                }
                return no_highlight(sink);
            }
            TokenKind::CloseBracket => {
                if self.in_attribute {
                    self.in_attribute = false;
                    sink(
                        new_span(before, text, file_span),
                        Highlight::Token { text: "]", class: None },
                    );
                    sink(DUMMY_SP, Highlight::ExitSpan);
                    return;
                }
                return no_highlight(sink);
            }
            TokenKind::Literal { kind, .. } => match kind {
                // Text literals.
                LiteralKind::Byte { .. }
                | LiteralKind::Char { .. }
                | LiteralKind::Str { .. }
                | LiteralKind::ByteStr { .. }
                | LiteralKind::RawStr { .. }
                | LiteralKind::RawByteStr { .. }
                | LiteralKind::CStr { .. }
                | LiteralKind::RawCStr { .. } => Class::String,
                // Number literals.
                LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
            },
            TokenKind::GuardedStrPrefix => return no_highlight(sink),
            TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
                self.in_macro = true;
                let span = new_span(before, text, file_span);
                sink(DUMMY_SP, Highlight::EnterSpan { class: Class::Macro(span) });
                sink(span, Highlight::Token { text, class: None });
                return;
            }
            TokenKind::Ident => match get_real_ident_class(text, false) {
                None => match text {
                    "Option" | "Result" => Class::PreludeTy(new_span(before, text, file_span)),
                    "Some" | "None" | "Ok" | "Err" => {
                        Class::PreludeVal(new_span(before, text, file_span))
                    }
                    // "union" is a weak keyword and is only considered as a keyword when declaring
                    // a union type.
                    "union" if self.check_if_is_union_keyword() => Class::KeyWord,
                    _ if self.in_macro_nonterminal => {
                        self.in_macro_nonterminal = false;
                        Class::MacroNonTerminal
                    }
                    "self" | "Self" => Class::Self_(new_span(before, text, file_span)),
                    _ => Class::Ident(new_span(before, text, file_span)),
                },
                Some(c) => c,
            },
            TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
                Class::Ident(new_span(before, text, file_span))
            }
            TokenKind::Lifetime { .. }
            | TokenKind::RawLifetime
            | TokenKind::UnknownPrefixLifetime => Class::Lifetime,
            TokenKind::Eof => panic!("Eof in advance"),
        };
        // Anything that didn't return above is the simple case where we the
        // class just spans a single token, so we can use the `string` method.
        let mut start = 0u32;
        for part in text.split('\n').intersperse("\n").filter(|s| !s.is_empty()) {
            sink(
                new_span(before + start, part, file_span),
                Highlight::Token { text: part, class: Some(class) },
            );
            start += part.len() as u32;
        }
    }

    fn peek(&mut self) -> Option<TokenKind> {
        self.tokens.peek().map(|(token_kind, _text)| *token_kind)
    }

    fn check_if_is_union_keyword(&mut self) -> bool {
        while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) {
            if *kind == TokenKind::Whitespace {
                continue;
            }
            return *kind == TokenKind::Ident;
        }
        false
    }
}

/// Called when we start processing a span of text that should be highlighted.
/// The `Class` argument specifies how it should be highlighted.
fn enter_span(
    out: &mut impl Write,
    klass: Class,
    href_context: &Option<HrefContext<'_, '_>>,
) -> &'static str {
    string_without_closing_tag(out, "", Some(klass), href_context, true).expect(
        "internal error: enter_span was called with Some(klass) but did not return a \
            closing HTML tag",
    )
}

/// Called at the end of a span of highlighted text.
fn exit_span(out: &mut impl Write, closing_tag: &str) {
    out.write_str(closing_tag).unwrap();
}

/// Called for a span of text. If the text should be highlighted differently
/// from the surrounding text, then the `Class` argument will be a value other
/// than `None`.
///
/// The following sequences of callbacks are equivalent:
/// ```plain
///     enter_span(Foo), string("text", None), exit_span()
///     string("text", Foo)
/// ```
///
/// The latter can be thought of as a shorthand for the former, which is more
/// flexible.
///
/// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function
/// will then try to find this `span` in the `span_correspondence_map`. If found, it'll then
/// generate a link for this element (which corresponds to where its definition is located).
fn string<W: Write>(
    out: &mut W,
    text: EscapeBodyText<'_>,
    klass: Option<Class>,
    href_context: &Option<HrefContext<'_, '_>>,
    open_tag: bool,
    write_line_number_callback: fn(&mut W, u32, &'static str),
) {
    if let Some(Class::Backline(line)) = klass {
        write_line_number_callback(out, line, "\n");
    } else if let Some(Class::Expansion) = klass {
        // This has already been escaped so we get the text to write it directly.
        out.write_str(text.0).unwrap();
    } else if let Some(closing_tag) =
        string_without_closing_tag(out, text, klass, href_context, open_tag)
    {
        out.write_str(closing_tag).unwrap();
    }
}

/// This function writes `text` into `out` with some modifications depending on `klass`:
///
/// * If `klass` is `None`, `text` is written into `out` with no modification.
/// * If `klass` is `Some` but `klass.get_span()` is `None`, it writes the text wrapped in a
///   `<span>` with the provided `klass`.
/// * If `klass` is `Some` and has a [`rustc_span::Span`], it then tries to generate a link (`<a>`
///   element) by retrieving the link information from the `span_correspondence_map` that was filled
///   in `span_map.rs::collect_spans_and_sources`. If it cannot retrieve the information, then it's
///   the same as the second point (`klass` is `Some` but doesn't have a [`rustc_span::Span`]).
fn string_without_closing_tag<T: Display>(
    out: &mut impl Write,
    text: T,
    klass: Option<Class>,
    href_context: &Option<HrefContext<'_, '_>>,
    open_tag: bool,
) -> Option<&'static str> {
    let Some(klass) = klass else {
        write!(out, "{text}").unwrap();
        return None;
    };
    let Some(def_span) = klass.get_span() else {
        if !open_tag {
            write!(out, "{text}").unwrap();
            return None;
        }
        write!(out, "<span class=\"{klass}\">{text}", klass = klass.as_html()).unwrap();
        return Some("</span>");
    };

    let mut text_s = text.to_string();
    if text_s.contains("::") {
        text_s = text_s.split("::").intersperse("::").fold(String::new(), |mut path, t| {
            match t {
                "self" | "Self" => write!(
                    &mut path,
                    "<span class=\"{klass}\">{t}</span>",
                    klass = Class::Self_(DUMMY_SP).as_html(),
                ),
                "crate" | "super" => {
                    write!(
                        &mut path,
                        "<span class=\"{klass}\">{t}</span>",
                        klass = Class::KeyWord.as_html(),
                    )
                }
                t => write!(&mut path, "{t}"),
            }
            .expect("Failed to build source HTML path");
            path
        });
    }

    if let Some(href_context) = href_context
        && let Some(href) = href_context.context.shared.span_correspondence_map.get(&def_span)
        && let Some(href) = {
            let context = href_context.context;
            // FIXME: later on, it'd be nice to provide two links (if possible) for all items:
            // one to the documentation page and one to the source definition.
            // FIXME: currently, external items only generate a link to their documentation,
            // a link to their definition can be generated using this:
            // https://github.com/rust-lang/rust/blob/60f1a2fc4b535ead9c85ce085fdce49b1b097531/src/librustdoc/html/render/context.rs#L315-L338
            match href {
                LinkFromSrc::Local(span) => {
                    context.href_from_span_relative(*span, &href_context.current_href)
                }
                LinkFromSrc::External(def_id) => {
                    format::href_with_root_path(*def_id, context, Some(href_context.root_path))
                        .ok()
                        .map(|(url, _, _)| url)
                }
                LinkFromSrc::Primitive(prim) => format::href_with_root_path(
                    PrimitiveType::primitive_locations(context.tcx())[prim],
                    context,
                    Some(href_context.root_path),
                )
                .ok()
                .map(|(url, _, _)| url),
                LinkFromSrc::Doc(def_id) => {
                    format::href_with_root_path(*def_id, context, Some(href_context.root_path))
                        .ok()
                        .map(|(doc_link, _, _)| doc_link)
                }
            }
        }
    {
        if !open_tag {
            // We're already inside an element which has the same klass, no need to give it
            // again.
            write!(out, "<a href=\"{href}\">{text_s}").unwrap();
        } else {
            let klass_s = klass.as_html();
            if klass_s.is_empty() {
                write!(out, "<a href=\"{href}\">{text_s}").unwrap();
            } else {
                write!(out, "<a class=\"{klass_s}\" href=\"{href}\">{text_s}").unwrap();
            }
        }
        return Some("</a>");
    }
    if !open_tag {
        out.write_str(&text_s).unwrap();
        return None;
    }
    let klass_s = klass.as_html();
    if klass_s.is_empty() {
        out.write_str(&text_s).unwrap();
        Some("")
    } else {
        write!(out, "<span class=\"{klass_s}\">{text_s}").unwrap();
        Some("</span>")
    }
}

#[cfg(test)]
mod tests;