diff options
| author | Jonathan Brouwer <jonathantbrouwer@gmail.com> | 2025-07-31 15:37:43 +0200 |
|---|---|---|
| committer | Jonathan Brouwer <jonathantbrouwer@gmail.com> | 2025-08-22 08:38:37 +0200 |
| commit | 549314bdb7b7a3ac1313feea9283366f5115fca7 (patch) | |
| tree | 092c77eb92cc5442dcdc84d75e3d0a6ef9bb72c9 /compiler/rustc_attr_parsing/src | |
| parent | 21d31897794ed7fc7990de8e664d3c4ec511da7d (diff) | |
| download | rust-549314bdb7b7a3ac1313feea9283366f5115fca7.tar.gz rust-549314bdb7b7a3ac1313feea9283366f5115fca7.zip | |
Rewrite the new attribute parser
Diffstat (limited to 'compiler/rustc_attr_parsing/src')
| -rw-r--r-- | compiler/rustc_attr_parsing/src/parser.rs | 480 | ||||
| -rw-r--r-- | compiler/rustc_attr_parsing/src/session_diagnostics.rs | 36 |
2 files changed, 284 insertions, 232 deletions
diff --git a/compiler/rustc_attr_parsing/src/parser.rs b/compiler/rustc_attr_parsing/src/parser.rs index aecaae947c9..364f8819d13 100644 --- a/compiler/rustc_attr_parsing/src/parser.rs +++ b/compiler/rustc_attr_parsing/src/parser.rs @@ -3,45 +3,30 @@ //! //! FIXME(jdonszelmann): delete `rustc_ast/attr/mod.rs` +use std::borrow::Cow; use std::fmt::{Debug, Display}; -use std::iter::Peekable; -use rustc_ast::token::{self, Delimiter, Token}; -use rustc_ast::tokenstream::{TokenStreamIter, TokenTree}; +use rustc_ast::token::{self, Delimiter, MetaVarKind}; +use rustc_ast::tokenstream::TokenStream; use rustc_ast::{AttrArgs, DelimArgs, Expr, ExprKind, LitKind, MetaItemLit, NormalAttr, Path}; use rustc_ast_pretty::pprust; -use rustc_errors::DiagCtxtHandle; +use rustc_errors::PResult; use rustc_hir::{self as hir, AttrPath}; -use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, kw, sym}; - -pub struct SegmentIterator<'a> { - offset: usize, - path: &'a PathParser<'a>, -} - -impl<'a> Iterator for SegmentIterator<'a> { - type Item = &'a Ident; - - fn next(&mut self) -> Option<Self::Item> { - if self.offset >= self.path.len() { - return None; - } - - let res = match self.path { - PathParser::Ast(ast_path) => &ast_path.segments[self.offset].ident, - PathParser::Attr(attr_path) => &attr_path.segments[self.offset], - }; - - self.offset += 1; - Some(res) - } -} +use rustc_parse::exp; +use rustc_parse::parser::{Parser, PathStyle, token_descr}; +use rustc_session::errors::report_lit_error; +use rustc_session::parse::ParseSess; +use rustc_span::{ErrorGuaranteed, Ident, Span, Symbol, sym}; +use thin_vec::ThinVec; + +use crate::ShouldEmit; +use crate::session_diagnostics::{ + InvalidMetaItem, InvalidMetaItemQuoteIdentSugg, InvalidMetaItemRemoveNegSugg, MetaBadDelim, + MetaBadDelimSugg, SuffixedLiteralInAttribute, +}; #[derive(Clone, Debug)] -pub enum PathParser<'a> { - Ast(&'a Path), - Attr(AttrPath), -} +pub struct PathParser<'a>(pub Cow<'a, Path>); impl<'a> PathParser<'a> { pub fn get_attribute_path(&self) -> hir::AttrPath { @@ -52,21 +37,15 @@ impl<'a> PathParser<'a> { } pub fn segments(&'a self) -> impl Iterator<Item = &'a Ident> { - SegmentIterator { offset: 0, path: self } + self.0.segments.iter().map(|seg| &seg.ident) } pub fn span(&self) -> Span { - match self { - PathParser::Ast(path) => path.span, - PathParser::Attr(attr_path) => attr_path.span, - } + self.0.span } pub fn len(&self) -> usize { - match self { - PathParser::Ast(path) => path.segments.len(), - PathParser::Attr(attr_path) => attr_path.segments.len(), - } + self.0.segments.len() } pub fn segments_is(&self, segments: &[Symbol]) -> bool { @@ -99,10 +78,7 @@ impl<'a> PathParser<'a> { impl Display for PathParser<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - PathParser::Ast(path) => write!(f, "{}", pprust::path_to_string(path)), - PathParser::Attr(attr_path) => write!(f, "{attr_path}"), - } + write!(f, "{}", pprust::path_to_string(&self.0)) } } @@ -123,21 +99,39 @@ impl<'a> ArgParser<'a> { } } - pub fn from_attr_args<'sess>(value: &'a AttrArgs, dcx: DiagCtxtHandle<'sess>) -> Self { - match value { + pub fn from_attr_args<'sess>( + value: &'a AttrArgs, + parts: &[Symbol], + psess: &'sess ParseSess, + should_emit: ShouldEmit, + ) -> Option<Self> { + Some(match value { AttrArgs::Empty => Self::NoArgs, - AttrArgs::Delimited(args) if args.delim == Delimiter::Parenthesis => { - Self::List(MetaItemListParser::new(args, dcx)) - } AttrArgs::Delimited(args) => { - Self::List(MetaItemListParser { sub_parsers: vec![], span: args.dspan.entire() }) + // The arguments of rustc_dummy are not validated if the arguments are delimited + if parts == &[sym::rustc_dummy] { + return Some(ArgParser::List(MetaItemListParser { + sub_parsers: ThinVec::new(), + span: args.dspan.entire(), + })); + } + + if args.delim != Delimiter::Parenthesis { + psess.dcx().emit_err(MetaBadDelim { + span: args.dspan.entire(), + sugg: MetaBadDelimSugg { open: args.dspan.open, close: args.dspan.close }, + }); + return None; + } + + Self::List(MetaItemListParser::new(args, psess, should_emit)?) } AttrArgs::Eq { eq_span, expr } => Self::NameValue(NameValueParser { eq_span: *eq_span, - value: expr_to_lit(dcx, &expr, *eq_span), + value: expr_to_lit(psess, &expr, expr.span, should_emit)?, value_span: expr.span, }), - } + }) } /// Asserts that this MetaItem is a list @@ -249,11 +243,16 @@ impl<'a> Debug for MetaItemParser<'a> { impl<'a> MetaItemParser<'a> { /// Create a new parser from a [`NormalAttr`], which is stored inside of any /// [`ast::Attribute`](rustc_ast::Attribute) - pub fn from_attr<'sess>(attr: &'a NormalAttr, dcx: DiagCtxtHandle<'sess>) -> Self { - Self { - path: PathParser::Ast(&attr.item.path), - args: ArgParser::from_attr_args(&attr.item.args, dcx), - } + pub fn from_attr<'sess>( + attr: &'a NormalAttr, + parts: &[Symbol], + psess: &'sess ParseSess, + should_emit: ShouldEmit, + ) -> Option<Self> { + Some(Self { + path: PathParser(Cow::Borrowed(&attr.item.path)), + args: ArgParser::from_attr_args(&attr.item.args, parts, psess, should_emit)?, + }) } } @@ -318,215 +317,232 @@ impl NameValueParser { } } -fn expr_to_lit(dcx: DiagCtxtHandle<'_>, expr: &Expr, span: Span) -> MetaItemLit { - // In valid code the value always ends up as a single literal. Otherwise, a dummy - // literal suffices because the error is handled elsewhere. - if let ExprKind::Lit(token_lit) = expr.kind - && let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span) - { - lit +fn expr_to_lit( + psess: &ParseSess, + expr: &Expr, + span: Span, + should_emit: ShouldEmit, +) -> Option<MetaItemLit> { + if let ExprKind::Lit(token_lit) = expr.kind { + let res = MetaItemLit::from_token_lit(token_lit, expr.span); + match res { + Ok(lit) => { + if token_lit.suffix.is_some() { + psess + .dcx() + .create_err(SuffixedLiteralInAttribute { span: lit.span }) + .emit_unless_delay(!should_emit.should_emit()); + None + } else { + if should_emit.should_emit() && !lit.kind.is_unsuffixed() { + // Emit error and continue, we can still parse the attribute as if the suffix isn't there + psess.dcx().emit_err(SuffixedLiteralInAttribute { span: lit.span }); + } + + Some(lit) + } + } + Err(err) => { + let guar = report_lit_error(psess, err, token_lit, expr.span); + let lit = MetaItemLit { + symbol: token_lit.symbol, + suffix: token_lit.suffix, + kind: LitKind::Err(guar), + span: expr.span, + }; + Some(lit) + } + } } else { - let guar = dcx.span_delayed_bug( - span, - "expr in place where literal is expected (builtin attr parsing)", - ); - MetaItemLit { symbol: sym::dummy, suffix: None, kind: LitKind::Err(guar), span } + // Example cases: + // - `#[foo = 1+1]`: results in `ast::ExprKind::BinOp`. + // - `#[foo = include_str!("nonexistent-file.rs")]`: + // results in `ast::ExprKind::Err`. In that case we delay + // the error because an earlier error will have already + // been reported. + let msg = "attribute value must be a literal"; + let mut err = psess.dcx().struct_span_err(span, msg); + if let ExprKind::Err(_) = expr.kind { + err.downgrade_to_delayed_bug(); + } + + err.emit_unless_delay(!should_emit.should_emit()); + None } } struct MetaItemListParserContext<'a, 'sess> { - // the tokens inside the delimiters, so `#[some::attr(a b c)]` would have `a b c` inside - inside_delimiters: Peekable<TokenStreamIter<'a>>, - dcx: DiagCtxtHandle<'sess>, + parser: &'a mut Parser<'sess>, + should_emit: ShouldEmit, } impl<'a, 'sess> MetaItemListParserContext<'a, 'sess> { - fn done(&mut self) -> bool { - self.inside_delimiters.peek().is_none() - } - - fn next_path(&mut self) -> Option<AttrPath> { - // FIXME: Share code with `parse_path`. - let tt = self.inside_delimiters.next().map(|tt| TokenTree::uninterpolate(tt)); - - match tt.as_deref()? { - &TokenTree::Token( - Token { kind: ref kind @ (token::Ident(..) | token::PathSep), span }, - _, - ) => { - // here we have either an ident or pathsep `::`. - - let mut segments = if let &token::Ident(name, _) = kind { - // when we lookahead another pathsep, more path's coming - if let Some(TokenTree::Token(Token { kind: token::PathSep, .. }, _)) = - self.inside_delimiters.peek() - { - self.inside_delimiters.next(); - vec![Ident::new(name, span)] - } else { - // else we have a single identifier path, that's all - return Some(AttrPath { - segments: vec![Ident::new(name, span)].into_boxed_slice(), - span, - }); - } - } else { - // if `::` is all we get, we just got a path root - vec![Ident::new(kw::PathRoot, span)] - }; + fn parse_unsuffixed_meta_item_lit(&mut self) -> PResult<'sess, MetaItemLit> { + let uninterpolated_span = self.parser.token_uninterpolated_span(); + let Some(token_lit) = self.parser.eat_token_lit() else { + return self.parser.handle_missing_lit(Parser::mk_meta_item_lit_char); + }; - // one segment accepted. accept n more - loop { - // another ident? - if let Some(&TokenTree::Token(Token { kind: token::Ident(name, _), span }, _)) = - self.inside_delimiters - .next() - .map(|tt| TokenTree::uninterpolate(tt)) - .as_deref() - { - segments.push(Ident::new(name, span)); - } else { - return None; - } - // stop unless we see another `::` - if let Some(TokenTree::Token(Token { kind: token::PathSep, .. }, _)) = - self.inside_delimiters.peek() - { - self.inside_delimiters.next(); - } else { - break; - } - } - let span = span.with_hi(segments.last().unwrap().span.hi()); - Some(AttrPath { segments: segments.into_boxed_slice(), span }) - } - TokenTree::Token(Token { kind, .. }, _) if kind.is_delim() => None, - _ => { - // malformed attributes can get here. We can't crash, but somewhere else should've - // already warned for this. - None + let lit = match MetaItemLit::from_token_lit(token_lit, self.parser.prev_token.span) { + Ok(lit) => lit, + Err(err) => { + let guar = + report_lit_error(&self.parser.psess, err, token_lit, uninterpolated_span); + // Pack possible quotes and prefixes from the original literal into + // the error literal's symbol so they can be pretty-printed faithfully. + let suffixless_lit = token::Lit::new(token_lit.kind, token_lit.symbol, None); + let symbol = Symbol::intern(&suffixless_lit.to_string()); + let token_lit = token::Lit::new(token::Err(guar), symbol, token_lit.suffix); + MetaItemLit::from_token_lit(token_lit, uninterpolated_span).unwrap() } + }; + + if self.should_emit.should_emit() && !lit.kind.is_unsuffixed() { + // Emit error and continue, we can still parse the attribute as if the suffix isn't there + self.parser.dcx().emit_err(SuffixedLiteralInAttribute { span: lit.span }); } - } - fn value(&mut self) -> Option<MetaItemLit> { - match self.inside_delimiters.next() { - Some(TokenTree::Delimited(.., Delimiter::Invisible(_), inner_tokens)) => { - MetaItemListParserContext { - inside_delimiters: inner_tokens.iter().peekable(), - dcx: self.dcx, - } - .value() - } - Some(TokenTree::Token(token, _)) => MetaItemLit::from_token(token), - _ => None, + Ok(lit) + } + + fn parse_attr_item(&mut self) -> PResult<'sess, MetaItemParser<'static>> { + if let Some(MetaVarKind::Meta { has_meta_form }) = self.parser.token.is_metavar_seq() { + return if has_meta_form { + let attr_item = self + .parser + .eat_metavar_seq(MetaVarKind::Meta { has_meta_form: true }, |this| { + MetaItemListParserContext { parser: this, should_emit: self.should_emit } + .parse_attr_item() + }) + .unwrap(); + Ok(attr_item) + } else { + self.parser.unexpected_any() + }; } + + let path = self.parser.parse_path(PathStyle::Mod)?; + + // Check style of arguments that this meta item has + let args = if self.parser.check(exp!(OpenParen)) { + let start = self.parser.token.span; + let (sub_parsers, _) = self.parser.parse_paren_comma_seq(|parser| { + MetaItemListParserContext { parser, should_emit: self.should_emit } + .parse_meta_item_inner() + })?; + let end = self.parser.prev_token.span; + ArgParser::List(MetaItemListParser { sub_parsers, span: start.with_hi(end.hi()) }) + } else if self.parser.eat(exp!(Eq)) { + let eq_span = self.parser.prev_token.span; + let value = self.parse_unsuffixed_meta_item_lit()?; + + ArgParser::NameValue(NameValueParser { eq_span, value, value_span: value.span }) + } else { + ArgParser::NoArgs + }; + + Ok(MetaItemParser { path: PathParser(Cow::Owned(path)), args }) } - /// parses one element on the inside of a list attribute like `#[my_attr( <insides> )]` - /// - /// parses a path followed be either: - /// 1. nothing (a word attr) - /// 2. a parenthesized list - /// 3. an equals sign and a literal (name-value) - /// - /// Can also parse *just* a literal. This is for cases like as `#[my_attr("literal")]` - /// where no path is given before the literal - /// - /// Some exceptions too for interpolated attributes which are already pre-processed - fn next(&mut self) -> Option<MetaItemOrLitParser<'a>> { - // a list element is either a literal - if let Some(TokenTree::Token(token, _)) = self.inside_delimiters.peek() - && let Some(lit) = MetaItemLit::from_token(token) - { - self.inside_delimiters.next(); - return Some(MetaItemOrLitParser::Lit(lit)); - } else if let Some(TokenTree::Delimited(.., Delimiter::Invisible(_), inner_tokens)) = - self.inside_delimiters.peek() + fn parse_meta_item_inner(&mut self) -> PResult<'sess, MetaItemOrLitParser<'static>> { + match self.parse_unsuffixed_meta_item_lit() { + Ok(lit) => return Ok(MetaItemOrLitParser::Lit(lit)), + Err(err) => err.cancel(), // we provide a better error below + } + + match self.parse_attr_item() { + Ok(mi) => return Ok(MetaItemOrLitParser::MetaItemParser(mi)), + Err(err) => err.cancel(), // we provide a better error below + } + + let mut err = InvalidMetaItem { + span: self.parser.token.span, + descr: token_descr(&self.parser.token), + quote_ident_sugg: None, + remove_neg_sugg: None, + }; + + // Suggest quoting idents, e.g. in `#[cfg(key = value)]`. We don't use `Token::ident` and + // don't `uninterpolate` the token to avoid suggesting anything butchered or questionable + // when macro metavariables are involved. + if self.parser.prev_token == token::Eq + && let token::Ident(..) = self.parser.token.kind { - self.inside_delimiters.next(); - return MetaItemListParserContext { - inside_delimiters: inner_tokens.iter().peekable(), - dcx: self.dcx, + let before = self.parser.token.span.shrink_to_lo(); + while let token::Ident(..) = self.parser.token.kind { + self.parser.bump(); } - .next(); + err.quote_ident_sugg = Some(InvalidMetaItemQuoteIdentSugg { + before, + after: self.parser.prev_token.span.shrink_to_hi(), + }); } - // or a path. - let path = self.next_path()?; - - // Paths can be followed by: - // - `(more meta items)` (another list) - // - `= lit` (a name-value) - // - nothing - Some(MetaItemOrLitParser::MetaItemParser(match self.inside_delimiters.peek() { - Some(TokenTree::Delimited(dspan, _, Delimiter::Parenthesis, inner_tokens)) => { - self.inside_delimiters.next(); - - MetaItemParser { - path: PathParser::Attr(path), - args: ArgParser::List(MetaItemListParser::new_tts( - inner_tokens.iter(), - dspan.entire(), - self.dcx, - )), - } - } - Some(TokenTree::Delimited(_, ..)) => { - self.inside_delimiters.next(); - // self.dcx.span_delayed_bug(span.entire(), "wrong delimiters"); - return None; - } - Some(TokenTree::Token(Token { kind: token::Eq, span }, _)) => { - self.inside_delimiters.next(); - let value = self.value()?; - MetaItemParser { - path: PathParser::Attr(path), - args: ArgParser::NameValue(NameValueParser { - eq_span: *span, - value_span: value.span, - value, - }), - } - } - _ => MetaItemParser { path: PathParser::Attr(path), args: ArgParser::NoArgs }, - })) + if self.parser.token == token::Minus + && self + .parser + .look_ahead(1, |t| matches!(t.kind, rustc_ast::token::TokenKind::Literal { .. })) + { + err.remove_neg_sugg = + Some(InvalidMetaItemRemoveNegSugg { negative_sign: self.parser.token.span }); + self.parser.bump(); + self.parser.bump(); + } + + Err(self.parser.dcx().create_err(err)) } - fn parse(mut self, span: Span) -> MetaItemListParser<'a> { - let mut sub_parsers = Vec::new(); + fn parse( + tokens: TokenStream, + psess: &'sess ParseSess, + span: Span, + should_emit: ShouldEmit, + ) -> PResult<'sess, MetaItemListParser<'static>> { + let mut parser = Parser::new(psess, tokens, None); + let mut this = MetaItemListParserContext { parser: &mut parser, should_emit }; - while !self.done() { - let Some(n) = self.next() else { - continue; - }; - sub_parsers.push(n); + // Presumably, the majority of the time there will only be one attr. + let mut sub_parsers = ThinVec::with_capacity(1); + while this.parser.token != token::Eof { + sub_parsers.push(this.parse_meta_item_inner()?); - match self.inside_delimiters.peek() { - None | Some(TokenTree::Token(Token { kind: token::Comma, .. }, _)) => { - self.inside_delimiters.next(); - } - Some(_) => {} + if !this.parser.eat(exp!(Comma)) { + break; } } - MetaItemListParser { sub_parsers, span } + if parser.token != token::Eof { + parser.unexpected()?; + } + + Ok(MetaItemListParser { sub_parsers, span }) } } #[derive(Debug, Clone)] pub struct MetaItemListParser<'a> { - sub_parsers: Vec<MetaItemOrLitParser<'a>>, + sub_parsers: ThinVec<MetaItemOrLitParser<'a>>, pub span: Span, } impl<'a> MetaItemListParser<'a> { - fn new<'sess>(delim: &'a DelimArgs, dcx: DiagCtxtHandle<'sess>) -> Self { - MetaItemListParser::new_tts(delim.tokens.iter(), delim.dspan.entire(), dcx) - } - - fn new_tts<'sess>(tts: TokenStreamIter<'a>, span: Span, dcx: DiagCtxtHandle<'sess>) -> Self { - MetaItemListParserContext { inside_delimiters: tts.peekable(), dcx }.parse(span) + fn new<'sess>( + delim: &'a DelimArgs, + psess: &'sess ParseSess, + should_emit: ShouldEmit, + ) -> Option<Self> { + match MetaItemListParserContext::parse( + delim.tokens.clone(), + psess, + delim.dspan.entire(), + should_emit, + ) { + Ok(s) => Some(s), + Err(e) => { + e.emit_unless_delay(!should_emit.should_emit()); + None + } + } } /// Lets you pick and choose as what you want to parse each element in the list diff --git a/compiler/rustc_attr_parsing/src/session_diagnostics.rs b/compiler/rustc_attr_parsing/src/session_diagnostics.rs index b877caa143a..593affc0537 100644 --- a/compiler/rustc_attr_parsing/src/session_diagnostics.rs +++ b/compiler/rustc_attr_parsing/src/session_diagnostics.rs @@ -790,3 +790,39 @@ pub(crate) struct MetaBadDelimSugg { #[suggestion_part(code = ")")] pub close: Span, } + +#[derive(Diagnostic)] +#[diag(attr_parsing_invalid_meta_item)] +pub(crate) struct InvalidMetaItem { + #[primary_span] + pub span: Span, + pub descr: String, + #[subdiagnostic] + pub quote_ident_sugg: Option<InvalidMetaItemQuoteIdentSugg>, + #[subdiagnostic] + pub remove_neg_sugg: Option<InvalidMetaItemRemoveNegSugg>, +} + +#[derive(Subdiagnostic)] +#[multipart_suggestion(attr_parsing_quote_ident_sugg, applicability = "machine-applicable")] +pub(crate) struct InvalidMetaItemQuoteIdentSugg { + #[suggestion_part(code = "\"")] + pub before: Span, + #[suggestion_part(code = "\"")] + pub after: Span, +} + +#[derive(Subdiagnostic)] +#[multipart_suggestion(attr_parsing_remove_neg_sugg, applicability = "machine-applicable")] +pub(crate) struct InvalidMetaItemRemoveNegSugg { + #[suggestion_part(code = "")] + pub negative_sign: Span, +} + +#[derive(Diagnostic)] +#[diag(attr_parsing_suffixed_literal_in_attribute)] +#[help] +pub(crate) struct SuffixedLiteralInAttribute { + #[primary_span] + pub span: Span, +} |
