about summary refs log tree commit diff
path: root/compiler/rustc_attr_parsing/src/parser.rs
diff options
context:
space:
mode:
authorJana Dönszelmann <jana@donsz.nl>2025-02-09 22:49:31 +0100
committerJana Dönszelmann <jana@donsz.nl>2025-02-24 14:26:06 +0100
commitdbd3b7928e91758296a0f6093d72f90214888133 (patch)
tree42d4c2e5310b11732c987657ed009ff375e1798d /compiler/rustc_attr_parsing/src/parser.rs
parent115b3b03b09cf71aca67c974ea0d4888c86b3012 (diff)
downloadrust-dbd3b7928e91758296a0f6093d72f90214888133.tar.gz
rust-dbd3b7928e91758296a0f6093d72f90214888133.zip
Introduce new parsing infrastructure and types for parsed attributes
fixup docs in parser
Diffstat (limited to 'compiler/rustc_attr_parsing/src/parser.rs')
-rw-r--r--compiler/rustc_attr_parsing/src/parser.rs625
1 files changed, 625 insertions, 0 deletions
diff --git a/compiler/rustc_attr_parsing/src/parser.rs b/compiler/rustc_attr_parsing/src/parser.rs
new file mode 100644
index 00000000000..74c28f10843
--- /dev/null
+++ b/compiler/rustc_attr_parsing/src/parser.rs
@@ -0,0 +1,625 @@
+//! This is in essence an (improved) duplicate of `rustc_ast/attr/mod.rs`.
+//! That module is intended to be deleted in its entirety.
+//!
+//! FIXME(jdonszelmann): delete `rustc_ast/attr/mod.rs`
+
+use std::fmt::{Debug, Display};
+use std::iter::Peekable;
+
+use rustc_ast::token::{self, Delimiter, Token};
+use rustc_ast::tokenstream::{TokenStreamIter, TokenTree};
+use rustc_ast::{AttrArgs, DelimArgs, Expr, ExprKind, LitKind, MetaItemLit, NormalAttr, Path};
+use rustc_ast_pretty::pprust;
+use rustc_errors::DiagCtxtHandle;
+use rustc_hir::{self as hir, AttrPath};
+use rustc_span::symbol::{Ident, kw};
+use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span, Symbol};
+
+pub struct SegmentIterator<'a> {
+    offset: usize,
+    path: &'a PathParser<'a>,
+}
+
+impl<'a> Iterator for SegmentIterator<'a> {
+    type Item = &'a Ident;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.offset >= self.path.len() {
+            return None;
+        }
+
+        let res = match self.path {
+            PathParser::Ast(ast_path) => &ast_path.segments[self.offset].ident,
+            PathParser::Attr(attr_path) => &attr_path.segments[self.offset],
+        };
+
+        self.offset += 1;
+        Some(res)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub enum PathParser<'a> {
+    Ast(&'a Path),
+    Attr(AttrPath),
+}
+
+impl<'a> PathParser<'a> {
+    pub fn get_attribute_path(&self) -> hir::AttrPath {
+        AttrPath {
+            segments: self.segments().copied().collect::<Vec<_>>().into_boxed_slice(),
+            span: self.span(),
+        }
+    }
+
+    pub fn segments(&'a self) -> impl Iterator<Item = &'a Ident> {
+        SegmentIterator { offset: 0, path: self }
+    }
+
+    pub fn span(&self) -> Span {
+        match self {
+            PathParser::Ast(path) => path.span,
+            PathParser::Attr(attr_path) => attr_path.span,
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        match self {
+            PathParser::Ast(path) => path.segments.len(),
+            PathParser::Attr(attr_path) => attr_path.segments.len(),
+        }
+    }
+
+    pub fn segments_is(&self, segments: &[Symbol]) -> bool {
+        self.len() == segments.len() && self.segments().zip(segments).all(|(a, b)| a.name == *b)
+    }
+
+    pub fn word(&self) -> Option<Ident> {
+        (self.len() == 1).then(|| **self.segments().next().as_ref().unwrap())
+    }
+
+    pub fn word_or_empty(&self) -> Ident {
+        self.word().unwrap_or_else(Ident::empty)
+    }
+
+    /// Asserts that this MetaItem is some specific word.
+    ///
+    /// See [`word`](Self::word) for examples of what a word is.
+    pub fn word_is(&self, sym: Symbol) -> bool {
+        self.word().map(|i| i.name == sym).unwrap_or(false)
+    }
+}
+
+impl Display for PathParser<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            PathParser::Ast(path) => write!(f, "{}", pprust::path_to_string(path)),
+            PathParser::Attr(attr_path) => write!(f, "{attr_path}"),
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+#[must_use]
+pub enum ArgParser<'a> {
+    NoArgs,
+    List(MetaItemListParser<'a>),
+    NameValue(NameValueParser),
+}
+
+impl<'a> ArgParser<'a> {
+    pub fn span(&self) -> Option<Span> {
+        match self {
+            Self::NoArgs => None,
+            Self::List(l) => Some(l.span),
+            Self::NameValue(n) => Some(n.value_span.with_lo(n.eq_span.lo())),
+        }
+    }
+
+    pub fn from_attr_args(value: &'a AttrArgs, dcx: DiagCtxtHandle<'a>) -> Self {
+        match value {
+            AttrArgs::Empty => Self::NoArgs,
+            AttrArgs::Delimited(args) if args.delim == Delimiter::Parenthesis => {
+                Self::List(MetaItemListParser::new(args, dcx))
+            }
+            AttrArgs::Delimited(args) => {
+                Self::List(MetaItemListParser { sub_parsers: vec![], span: args.dspan.entire() })
+            }
+            AttrArgs::Eq { eq_span, expr } => Self::NameValue(NameValueParser {
+                eq_span: *eq_span,
+                value: expr_to_lit(dcx, &expr),
+                value_span: expr.span,
+            }),
+        }
+    }
+
+    /// Asserts that this MetaItem is a list
+    ///
+    /// Some examples:
+    ///
+    /// - `#[allow(clippy::complexity)]`: `(clippy::complexity)` is a list
+    /// - `#[rustfmt::skip::macros(target_macro_name)]`: `(target_macro_name)` is a list
+    pub fn list(&self) -> Option<&MetaItemListParser<'a>> {
+        match self {
+            Self::List(l) => Some(l),
+            Self::NameValue(_) | Self::NoArgs => None,
+        }
+    }
+
+    /// Asserts that this MetaItem is a name-value pair.
+    ///
+    /// Some examples:
+    ///
+    /// - `#[clippy::cyclomatic_complexity = "100"]`: `clippy::cyclomatic_complexity = "100"` is a name value pair,
+    ///   where the name is a path (`clippy::cyclomatic_complexity`). You already checked the path
+    ///   to get an `ArgParser`, so this method will effectively only assert that the `= "100"` is
+    ///   there
+    /// - `#[doc = "hello"]`: `doc = "hello`  is also a name value pair
+    pub fn name_value(&self) -> Option<&NameValueParser> {
+        match self {
+            Self::NameValue(n) => Some(n),
+            Self::List(_) | Self::NoArgs => None,
+        }
+    }
+
+    /// Asserts that there are no arguments
+    pub fn no_args(&self) -> bool {
+        matches!(self, Self::NoArgs)
+    }
+}
+
+/// Inside lists, values could be either literals, or more deeply nested meta items.
+/// This enum represents that.
+///
+/// Choose which one you want using the provided methods.
+#[derive(Debug, Clone)]
+pub enum MetaItemOrLitParser<'a> {
+    MetaItemParser(MetaItemParser<'a>),
+    Lit(MetaItemLit),
+    Err(Span, ErrorGuaranteed),
+}
+
+impl<'a> MetaItemOrLitParser<'a> {
+    pub fn span(&self) -> Span {
+        match self {
+            MetaItemOrLitParser::MetaItemParser(generic_meta_item_parser) => {
+                generic_meta_item_parser.span()
+            }
+            MetaItemOrLitParser::Lit(meta_item_lit) => meta_item_lit.span,
+            MetaItemOrLitParser::Err(span, _) => *span,
+        }
+    }
+
+    pub fn lit(&self) -> Option<&MetaItemLit> {
+        match self {
+            MetaItemOrLitParser::Lit(meta_item_lit) => Some(meta_item_lit),
+            _ => None,
+        }
+    }
+
+    pub fn meta_item(&self) -> Option<&MetaItemParser<'a>> {
+        match self {
+            MetaItemOrLitParser::MetaItemParser(parser) => Some(parser),
+            _ => None,
+        }
+    }
+}
+
+/// Utility that deconstructs a MetaItem into usable parts.
+///
+/// MetaItems are syntactically extremely flexible, but specific attributes want to parse
+/// them in custom, more restricted ways. This can be done using this struct.
+///
+/// MetaItems consist of some path, and some args. The args could be empty. In other words:
+///
+/// - `name` -> args are empty
+/// - `name(...)` -> args are a [`list`](ArgParser::list), which is the bit between the parentheses
+/// - `name = value`-> arg is [`name_value`](ArgParser::name_value), where the argument is the
+///   `= value` part
+///
+/// The syntax of MetaItems can be found at <https://doc.rust-lang.org/reference/attributes.html>
+#[derive(Clone)]
+pub struct MetaItemParser<'a> {
+    path: PathParser<'a>,
+    args: ArgParser<'a>,
+}
+
+impl<'a> Debug for MetaItemParser<'a> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("MetaItemParser")
+            .field("path", &self.path)
+            .field("args", &self.args)
+            .finish()
+    }
+}
+
+impl<'a> MetaItemParser<'a> {
+    /// Create a new parser from a [`NormalAttr`], which is stored inside of any
+    /// [`ast::Attribute`](rustc_ast::Attribute)
+    pub fn from_attr(attr: &'a NormalAttr, dcx: DiagCtxtHandle<'a>) -> Self {
+        Self {
+            path: PathParser::Ast(&attr.item.path),
+            args: ArgParser::from_attr_args(&attr.item.args, dcx),
+        }
+    }
+}
+
+impl<'a> MetaItemParser<'a> {
+    pub fn span(&self) -> Span {
+        if let Some(other) = self.args.span() {
+            self.path.span().with_hi(other.hi())
+        } else {
+            self.path.span()
+        }
+    }
+
+    /// Gets just the path, without the args.
+    pub fn path_without_args(&self) -> PathParser<'a> {
+        self.path.clone()
+    }
+
+    /// Gets just the args parser, without caring about the path.
+    pub fn args(&self) -> &ArgParser<'a> {
+        &self.args
+    }
+
+    pub fn deconstruct(&self) -> (PathParser<'a>, &ArgParser<'a>) {
+        (self.path_without_args(), self.args())
+    }
+
+    /// Asserts that this MetaItem starts with a path. Some examples:
+    ///
+    /// - `#[rustfmt::skip]`: `rustfmt::skip` is a path
+    /// - `#[allow(clippy::complexity)]`: `clippy::complexity` is a path
+    /// - `#[inline]`: `inline` is a single segment path
+    pub fn path(&self) -> (PathParser<'a>, &ArgParser<'a>) {
+        self.deconstruct()
+    }
+
+    /// Asserts that this MetaItem starts with a word, or single segment path.
+    /// Doesn't return the args parser.
+    ///
+    /// For examples. see [`Self::word`]
+    pub fn word_without_args(&self) -> Option<Ident> {
+        Some(self.word()?.0)
+    }
+
+    /// Like [`word`](Self::word), but returns an empty symbol instead of None
+    pub fn word_or_empty_without_args(&self) -> Ident {
+        self.word_or_empty().0
+    }
+
+    /// Asserts that this MetaItem starts with a word, or single segment path.
+    ///
+    /// Some examples:
+    /// - `#[inline]`: `inline` is a word
+    /// - `#[rustfmt::skip]`: `rustfmt::skip` is a path,
+    ///   and not a word and should instead be parsed using [`path`](Self::path)
+    pub fn word(&self) -> Option<(Ident, &ArgParser<'a>)> {
+        let (path, args) = self.deconstruct();
+        Some((path.word()?, args))
+    }
+
+    /// Like [`word`](Self::word), but returns an empty symbol instead of None
+    pub fn word_or_empty(&self) -> (Ident, &ArgParser<'a>) {
+        let (path, args) = self.deconstruct();
+        (path.word().unwrap_or(Ident::empty()), args)
+    }
+
+    /// Asserts that this MetaItem starts with some specific word.
+    ///
+    /// See [`word`](Self::word) for examples of what a word is.
+    pub fn word_is(&self, sym: Symbol) -> Option<&ArgParser<'a>> {
+        self.path_without_args().word_is(sym).then(|| self.args())
+    }
+
+    /// Asserts that this MetaItem starts with some specific path.
+    ///
+    /// See [`word`](Self::path) for examples of what a word is.
+    pub fn path_is(&self, segments: &[Symbol]) -> Option<&ArgParser<'a>> {
+        self.path_without_args().segments_is(segments).then(|| self.args())
+    }
+}
+
+#[derive(Clone)]
+pub struct NameValueParser {
+    pub eq_span: Span,
+    value: MetaItemLit,
+    pub value_span: Span,
+}
+
+impl Debug for NameValueParser {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("NameValueParser")
+            .field("eq_span", &self.eq_span)
+            .field("value", &self.value)
+            .field("value_span", &self.value_span)
+            .finish()
+    }
+}
+
+impl NameValueParser {
+    pub fn value_as_lit(&self) -> &MetaItemLit {
+        &self.value
+    }
+
+    pub fn value_as_str(&self) -> Option<Symbol> {
+        self.value_as_lit().kind.str()
+    }
+}
+
+fn expr_to_lit(dcx: DiagCtxtHandle<'_>, expr: &Expr) -> MetaItemLit {
+    // In valid code the value always ends up as a single literal. Otherwise, a dummy
+    // literal suffices because the error is handled elsewhere.
+    if let ExprKind::Lit(token_lit) = expr.kind
+        && let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span)
+    {
+        lit
+    } else {
+        let guar = dcx.has_errors().unwrap();
+        MetaItemLit { symbol: kw::Empty, suffix: None, kind: LitKind::Err(guar), span: DUMMY_SP }
+    }
+}
+
+struct MetaItemListParserContext<'a> {
+    // the tokens inside the delimiters, so `#[some::attr(a b c)]` would have `a b c` inside
+    inside_delimiters: Peekable<TokenStreamIter<'a>>,
+    dcx: DiagCtxtHandle<'a>,
+}
+
+impl<'a> MetaItemListParserContext<'a> {
+    fn done(&mut self) -> bool {
+        self.inside_delimiters.peek().is_none()
+    }
+
+    fn next_path(&mut self) -> Option<AttrPath> {
+        // FIXME: Share code with `parse_path`.
+        let tt = self.inside_delimiters.next().map(|tt| TokenTree::uninterpolate(tt));
+
+        match tt.as_deref()? {
+            &TokenTree::Token(
+                Token { kind: ref kind @ (token::Ident(..) | token::PathSep), span },
+                _,
+            ) => {
+                // here we have either an ident or pathsep `::`.
+
+                let mut segments = if let &token::Ident(name, _) = kind {
+                    // when we lookahead another pathsep, more path's coming
+                    if let Some(TokenTree::Token(Token { kind: token::PathSep, .. }, _)) =
+                        self.inside_delimiters.peek()
+                    {
+                        self.inside_delimiters.next();
+                        vec![Ident::new(name, span)]
+                    } else {
+                        // else we have a single identifier path, that's all
+                        return Some(AttrPath {
+                            segments: vec![Ident::new(name, span)].into_boxed_slice(),
+                            span,
+                        });
+                    }
+                } else {
+                    // if `::` is all we get, we just got a path root
+                    vec![Ident::new(kw::PathRoot, span)]
+                };
+
+                // one segment accepted. accept n more
+                loop {
+                    // another ident?
+                    if let Some(&TokenTree::Token(Token { kind: token::Ident(name, _), span }, _)) =
+                        self.inside_delimiters
+                            .next()
+                            .map(|tt| TokenTree::uninterpolate(tt))
+                            .as_deref()
+                    {
+                        segments.push(Ident::new(name, span));
+                    } else {
+                        return None;
+                    }
+                    // stop unless we see another `::`
+                    if let Some(TokenTree::Token(Token { kind: token::PathSep, .. }, _)) =
+                        self.inside_delimiters.peek()
+                    {
+                        self.inside_delimiters.next();
+                    } else {
+                        break;
+                    }
+                }
+                let span = span.with_hi(segments.last().unwrap().span.hi());
+                Some(AttrPath { segments: segments.into_boxed_slice(), span })
+            }
+            TokenTree::Token(
+                Token { kind: token::OpenDelim(_) | token::CloseDelim(_), .. },
+                _,
+            ) => None,
+            _ => {
+                // malformed attributes can get here. We can't crash, but somewhere else should've
+                // already warned for this.
+                None
+            }
+        }
+    }
+
+    fn value(&mut self) -> Option<MetaItemLit> {
+        match self.inside_delimiters.next() {
+            Some(TokenTree::Delimited(.., Delimiter::Invisible(_), inner_tokens)) => {
+                MetaItemListParserContext {
+                    inside_delimiters: inner_tokens.iter().peekable(),
+                    dcx: self.dcx,
+                }
+                .value()
+            }
+            Some(TokenTree::Token(token, _)) => MetaItemLit::from_token(token),
+            _ => None,
+        }
+    }
+
+    /// parses one element on the inside of a list attribute like `#[my_attr( <insides> )]`
+    ///
+    /// parses a path followed be either:
+    /// 1. nothing (a word attr)
+    /// 2. a parenthesized list
+    /// 3. an equals sign and a literal (name-value)
+    ///
+    /// Can also parse *just* a literal. This is for cases like as `#[my_attr("literal")]`
+    /// where no path is given before the literal
+    ///
+    /// Some exceptions too for interpolated attributes which are already pre-processed
+    fn next(&mut self) -> Option<MetaItemOrLitParser<'a>> {
+        // a list element is either a literal
+        if let Some(TokenTree::Token(token, _)) = self.inside_delimiters.peek()
+            && let Some(lit) = MetaItemLit::from_token(token)
+        {
+            self.inside_delimiters.next();
+            return Some(MetaItemOrLitParser::Lit(lit));
+        }
+
+        // or a path.
+        let path =
+            if let Some(TokenTree::Token(Token { kind: token::Interpolated(nt), span, .. }, _)) =
+                self.inside_delimiters.peek()
+            {
+                match &**nt {
+                    // or maybe a full nt meta including the path but we return immediately
+                    token::Nonterminal::NtMeta(item) => {
+                        self.inside_delimiters.next();
+
+                        return Some(MetaItemOrLitParser::MetaItemParser(MetaItemParser {
+                            path: PathParser::Ast(&item.path),
+                            args: ArgParser::from_attr_args(&item.args, self.dcx),
+                        }));
+                    }
+                    // an already interpolated path from a macro expansion is a path, no need to parse
+                    // one from tokens
+                    token::Nonterminal::NtPath(path) => {
+                        self.inside_delimiters.next();
+
+                        AttrPath::from_ast(path)
+                    }
+                    _ => {
+                        self.inside_delimiters.next();
+                        // we go into this path if an expr ended up in an attribute that
+                        // expansion did not turn into a literal. Say, `#[repr(align(macro!()))]`
+                        // where the macro didn't expand to a literal. An error is already given
+                        // for this at this point, and then we do continue. This makes this path
+                        // reachable...
+                        let e = self.dcx.span_delayed_bug(
+                            *span,
+                            "expr in place where literal is expected (builtin attr parsing)",
+                        );
+
+                        return Some(MetaItemOrLitParser::Err(*span, e));
+                    }
+                }
+            } else {
+                self.next_path()?
+            };
+
+        // Paths can be followed by:
+        // - `(more meta items)` (another list)
+        // - `= lit` (a name-value)
+        // - nothing
+        Some(MetaItemOrLitParser::MetaItemParser(match self.inside_delimiters.peek() {
+            Some(TokenTree::Delimited(dspan, _, Delimiter::Parenthesis, inner_tokens)) => {
+                self.inside_delimiters.next();
+
+                MetaItemParser {
+                    path: PathParser::Attr(path),
+                    args: ArgParser::List(MetaItemListParser::new_tts(
+                        inner_tokens.iter(),
+                        dspan.entire(),
+                        self.dcx,
+                    )),
+                }
+            }
+            Some(TokenTree::Delimited(_, ..)) => {
+                self.inside_delimiters.next();
+                // self.dcx.span_delayed_bug(span.entire(), "wrong delimiters");
+                return None;
+            }
+            Some(TokenTree::Token(Token { kind: token::Eq, span }, _)) => {
+                self.inside_delimiters.next();
+                let value = self.value()?;
+                MetaItemParser {
+                    path: PathParser::Attr(path),
+                    args: ArgParser::NameValue(NameValueParser {
+                        eq_span: *span,
+                        value_span: value.span,
+                        value,
+                    }),
+                }
+            }
+            _ => MetaItemParser { path: PathParser::Attr(path), args: ArgParser::NoArgs },
+        }))
+    }
+
+    fn parse(mut self, span: Span) -> MetaItemListParser<'a> {
+        let mut sub_parsers = Vec::new();
+
+        while !self.done() {
+            let Some(n) = self.next() else {
+                continue;
+            };
+            sub_parsers.push(n);
+
+            match self.inside_delimiters.peek() {
+                None | Some(TokenTree::Token(Token { kind: token::Comma, .. }, _)) => {
+                    self.inside_delimiters.next();
+                }
+                Some(_) => {}
+            }
+        }
+
+        MetaItemListParser { sub_parsers, span }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct MetaItemListParser<'a> {
+    sub_parsers: Vec<MetaItemOrLitParser<'a>>,
+    pub span: Span,
+}
+
+impl<'a> MetaItemListParser<'a> {
+    fn new(delim: &'a DelimArgs, dcx: DiagCtxtHandle<'a>) -> MetaItemListParser<'a> {
+        MetaItemListParser::new_tts(delim.tokens.iter(), delim.dspan.entire(), dcx)
+    }
+
+    fn new_tts(tts: TokenStreamIter<'a>, span: Span, dcx: DiagCtxtHandle<'a>) -> Self {
+        MetaItemListParserContext { inside_delimiters: tts.peekable(), dcx }.parse(span)
+    }
+
+    /// Lets you pick and choose as what you want to parse each element in the list
+    pub fn mixed<'s>(&'s self) -> impl Iterator<Item = &'s MetaItemOrLitParser<'a>> + 's {
+        self.sub_parsers.iter()
+    }
+
+    pub fn len(&self) -> usize {
+        self.sub_parsers.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Asserts that every item in the list is another list starting with a word.
+    ///
+    /// See [`MetaItemParser::word`] for examples of words.
+    pub fn all_word_list<'s>(&'s self) -> Option<Vec<(Ident, &'s ArgParser<'a>)>> {
+        self.mixed().map(|i| i.meta_item()?.word()).collect()
+    }
+
+    /// Asserts that every item in the list is another list starting with a full path.
+    ///
+    /// See [`MetaItemParser::path`] for examples of paths.
+    pub fn all_path_list<'s>(&'s self) -> Option<Vec<(PathParser<'a>, &'s ArgParser<'a>)>> {
+        self.mixed().map(|i| Some(i.meta_item()?.path())).collect()
+    }
+
+    /// Returns Some if the list contains only a single element.
+    ///
+    /// Inside the Some is the parser to parse this single element.
+    pub fn single(&self) -> Option<&MetaItemOrLitParser<'a>> {
+        let mut iter = self.mixed();
+        iter.next().filter(|_| iter.next().is_none())
+    }
+}