diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-12-04 15:55:06 +1100 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-12-19 16:05:41 +1100 |
| commit | b9bf0b4b10148aa914243a527d9010aba9b7b827 (patch) | |
| tree | 2478d44acd2d710543d168353b729e0716aefb70 /compiler/rustc_parse/src/parser/attr.rs | |
| parent | d5370d981f58ebadf575f075a6f0d8c35bc704e8 (diff) | |
| download | rust-b9bf0b4b10148aa914243a527d9010aba9b7b827.tar.gz rust-b9bf0b4b10148aa914243a527d9010aba9b7b827.zip | |
Speed up `Parser::expected_token_types`.
The parser pushes a `TokenType` to `Parser::expected_token_types` on every call to the various `check`/`eat` methods, and clears it on every call to `bump`. Some of those `TokenType` values are full tokens that require cloning and dropping. This is a *lot* of work for something that is only used in error messages and it accounts for a significant fraction of parsing execution time. This commit overhauls `TokenType` so that `Parser::expected_token_types` can be implemented as a bitset. This requires changing `TokenType` to a C-style parameterless enum, and adding `TokenTypeSet` which uses a `u128` for the bits. (The new `TokenType` has 105 variants.) The new types `ExpTokenPair` and `ExpKeywordPair` are now arguments to the `check`/`eat` methods. This is for maximum speed. The elements in the pairs are always statically known; e.g. a `token::BinOp(token::Star)` is always paired with a `TokenType::Star`. So we now compute `TokenType`s in advance and pass them in to `check`/`eat` rather than the current approach of constructing them on insertion into `expected_token_types`. Values of these pair types can be produced by the new `exp!` macro, which is used at every `check`/`eat` call site. The macro is for convenience, allowing any pair to be generated from a single identifier. The ident/keyword filtering in `expected_one_of_not_found` is no longer necessary. It was there to account for some sloppiness in `TokenKind`/`TokenType` comparisons. The existing `TokenType` is moved to a new file `token_type.rs`, and all its new infrastructure is added to that file. There is more boilerplate code than I would like, but I can't see how to make it shorter.
Diffstat (limited to 'compiler/rustc_parse/src/parser/attr.rs')
| -rw-r--r-- | compiler/rustc_parse/src/parser/attr.rs | 41 |
1 files changed, 20 insertions, 21 deletions
diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index 9da4ab5a788..2691e6f56d6 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -1,8 +1,7 @@ -use rustc_ast::token::{self, Delimiter}; -use rustc_ast::{self as ast, Attribute, attr}; +use rustc_ast::{self as ast, Attribute, attr, token}; use rustc_errors::codes::*; use rustc_errors::{Diag, PResult}; -use rustc_span::{BytePos, Span, kw}; +use rustc_span::{BytePos, Span}; use thin_vec::ThinVec; use tracing::debug; @@ -10,7 +9,7 @@ use super::{ AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing, UsePreAttrPos, }; -use crate::{errors, fluent_generated as fluent, maybe_whole}; +use crate::{errors, exp, fluent_generated as fluent, maybe_whole}; // Public for rustfmt usage #[derive(Debug)] @@ -45,7 +44,7 @@ impl<'a> Parser<'a> { let mut just_parsed_doc_comment = false; let start_pos = self.num_bump_calls; loop { - let attr = if self.check(&token::Pound) { + let attr = if self.check(exp!(Pound)) { let prev_outer_attr_sp = outer_attrs.last().map(|attr: &Attribute| attr.span); let inner_error_reason = if just_parsed_doc_comment { @@ -126,14 +125,14 @@ impl<'a> Parser<'a> { let lo = self.token.span; // Attributes can't have attributes of their own [Editor's note: not with that attitude] self.collect_tokens_no_attrs(|this| { - assert!(this.eat(&token::Pound), "parse_attribute called in non-attribute position"); + assert!(this.eat(exp!(Pound)), "parse_attribute called in non-attribute position"); let style = - if this.eat(&token::Not) { ast::AttrStyle::Inner } else { ast::AttrStyle::Outer }; + if this.eat(exp!(Not)) { ast::AttrStyle::Inner } else { ast::AttrStyle::Outer }; - this.expect(&token::OpenDelim(Delimiter::Bracket))?; + this.expect(exp!(OpenBracket))?; let item = this.parse_attr_item(ForceCollect::No)?; - this.expect(&token::CloseDelim(Delimiter::Bracket))?; + this.expect(exp!(CloseBracket))?; let attr_sp = lo.to(this.prev_token.span); // Emit error if inner attribute is encountered and forbidden. @@ -274,10 +273,10 @@ impl<'a> Parser<'a> { // Attr items don't have attributes. self.collect_tokens(None, AttrWrapper::empty(), force_collect, |this, _empty_attrs| { - let is_unsafe = this.eat_keyword(kw::Unsafe); + let is_unsafe = this.eat_keyword(exp!(Unsafe)); let unsafety = if is_unsafe { let unsafe_span = this.prev_token.span; - this.expect(&token::OpenDelim(Delimiter::Parenthesis))?; + this.expect(exp!(OpenParen))?; ast::Safety::Unsafe(unsafe_span) } else { ast::Safety::Default @@ -286,7 +285,7 @@ impl<'a> Parser<'a> { let path = this.parse_path(PathStyle::Mod)?; let args = this.parse_attr_args()?; if is_unsafe { - this.expect(&token::CloseDelim(Delimiter::Parenthesis))?; + this.expect(exp!(CloseParen))?; } Ok(( ast::AttrItem { unsafety, path, args, tokens: None }, @@ -306,7 +305,7 @@ impl<'a> Parser<'a> { loop { let start_pos = self.num_bump_calls; // Only try to parse if it is an inner attribute (has `!`). - let attr = if self.check(&token::Pound) && self.look_ahead(1, |t| t == &token::Not) { + let attr = if self.check(exp!(Pound)) && self.look_ahead(1, |t| t == &token::Not) { Some(self.parse_attribute(InnerAttrPolicy::Permitted)?) } else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind { if attr_style == ast::AttrStyle::Inner { @@ -358,7 +357,7 @@ impl<'a> Parser<'a> { &mut self, ) -> PResult<'a, (ast::MetaItemInner, Vec<(ast::AttrItem, Span)>)> { let cfg_predicate = self.parse_meta_item_inner()?; - self.expect(&token::Comma)?; + self.expect(exp!(Comma))?; // Presumably, the majority of the time there will only be one attr. let mut expanded_attrs = Vec::with_capacity(1); @@ -366,7 +365,7 @@ impl<'a> Parser<'a> { let lo = self.token.span; let item = self.parse_attr_item(ForceCollect::Yes)?; expanded_attrs.push((item, lo.to(self.prev_token.span))); - if !self.eat(&token::Comma) { + if !self.eat(exp!(Comma)) { break; } } @@ -380,7 +379,7 @@ impl<'a> Parser<'a> { let mut nmis = ThinVec::with_capacity(1); while self.token != token::Eof { nmis.push(self.parse_meta_item_inner()?); - if !self.eat(&token::Comma) { + if !self.eat(exp!(Comma)) { break; } } @@ -413,13 +412,13 @@ impl<'a> Parser<'a> { let lo = self.token.span; let is_unsafe = if unsafe_allowed == AllowLeadingUnsafe::Yes { - self.eat_keyword(kw::Unsafe) + self.eat_keyword(exp!(Unsafe)) } else { false }; let unsafety = if is_unsafe { let unsafe_span = self.prev_token.span; - self.expect(&token::OpenDelim(Delimiter::Parenthesis))?; + self.expect(exp!(OpenParen))?; ast::Safety::Unsafe(unsafe_span) } else { @@ -429,7 +428,7 @@ impl<'a> Parser<'a> { let path = self.parse_path(PathStyle::Mod)?; let kind = self.parse_meta_item_kind()?; if is_unsafe { - self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; + self.expect(exp!(CloseParen))?; } let span = lo.to(self.prev_token.span); @@ -437,9 +436,9 @@ impl<'a> Parser<'a> { } pub(crate) fn parse_meta_item_kind(&mut self) -> PResult<'a, ast::MetaItemKind> { - Ok(if self.eat(&token::Eq) { + Ok(if self.eat(exp!(Eq)) { ast::MetaItemKind::NameValue(self.parse_unsuffixed_meta_item_lit()?) - } else if self.check(&token::OpenDelim(Delimiter::Parenthesis)) { + } else if self.check(exp!(OpenParen)) { let (list, _) = self.parse_paren_comma_seq(|p| p.parse_meta_item_inner())?; ast::MetaItemKind::List(list) } else { |
