diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-12-04 15:55:06 +1100 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-12-19 16:05:41 +1100 |
| commit | b9bf0b4b10148aa914243a527d9010aba9b7b827 (patch) | |
| tree | 2478d44acd2d710543d168353b729e0716aefb70 /compiler/rustc_builtin_macros/src | |
| parent | d5370d981f58ebadf575f075a6f0d8c35bc704e8 (diff) | |
| download | rust-b9bf0b4b10148aa914243a527d9010aba9b7b827.tar.gz rust-b9bf0b4b10148aa914243a527d9010aba9b7b827.zip | |
Speed up `Parser::expected_token_types`.
The parser pushes a `TokenType` to `Parser::expected_token_types` on every call to the various `check`/`eat` methods, and clears it on every call to `bump`. Some of those `TokenType` values are full tokens that require cloning and dropping. This is a *lot* of work for something that is only used in error messages and it accounts for a significant fraction of parsing execution time. This commit overhauls `TokenType` so that `Parser::expected_token_types` can be implemented as a bitset. This requires changing `TokenType` to a C-style parameterless enum, and adding `TokenTypeSet` which uses a `u128` for the bits. (The new `TokenType` has 105 variants.) The new types `ExpTokenPair` and `ExpKeywordPair` are now arguments to the `check`/`eat` methods. This is for maximum speed. The elements in the pairs are always statically known; e.g. a `token::BinOp(token::Star)` is always paired with a `TokenType::Star`. So we now compute `TokenType`s in advance and pass them in to `check`/`eat` rather than the current approach of constructing them on insertion into `expected_token_types`. Values of these pair types can be produced by the new `exp!` macro, which is used at every `check`/`eat` call site. The macro is for convenience, allowing any pair to be generated from a single identifier. The ident/keyword filtering in `expected_one_of_not_found` is no longer necessary. It was there to account for some sloppiness in `TokenKind`/`TokenType` comparisons. The existing `TokenType` is moved to a new file `token_type.rs`, and all its new infrastructure is added to that file. There is more boilerplate code than I would like, but I can't see how to make it shorter.
Diffstat (limited to 'compiler/rustc_builtin_macros/src')
| -rw-r--r-- | compiler/rustc_builtin_macros/src/asm.rs | 112 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/assert.rs | 3 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/cfg.rs | 5 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/format.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/pattern_type.rs | 5 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/util.rs | 6 |
6 files changed, 71 insertions, 67 deletions
diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs index cce70fb2ea4..6ae697d4030 100644 --- a/compiler/rustc_builtin_macros/src/asm.rs +++ b/compiler/rustc_builtin_macros/src/asm.rs @@ -1,16 +1,16 @@ use ast::token::IdentIsRaw; use lint::BuiltinLintDiag; -use rustc_ast::AsmMacro; use rustc_ast::ptr::P; -use rustc_ast::token::{self, Delimiter}; use rustc_ast::tokenstream::TokenStream; +use rustc_ast::{AsmMacro, token}; use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; use rustc_errors::PResult; use rustc_expand::base::*; use rustc_index::bit_set::GrowableBitSet; -use rustc_parse::parser::Parser; +use rustc_parse::exp; +use rustc_parse::parser::{ExpKeywordPair, Parser}; use rustc_session::lint; -use rustc_span::{ErrorGuaranteed, Ident, InnerSpan, Span, Symbol, kw, sym}; +use rustc_span::{ErrorGuaranteed, Ident, InnerSpan, Span, Symbol, kw}; use rustc_target::asm::InlineAsmArch; use smallvec::smallvec; use {rustc_ast as ast, rustc_parse_format as parse}; @@ -38,16 +38,16 @@ pub struct AsmArgs { /// - `Err(_)` if the current token matches the keyword, but was not expected fn eat_operand_keyword<'a>( p: &mut Parser<'a>, - symbol: Symbol, + exp: ExpKeywordPair, asm_macro: AsmMacro, ) -> PResult<'a, bool> { if matches!(asm_macro, AsmMacro::Asm) { - Ok(p.eat_keyword(symbol)) + Ok(p.eat_keyword(exp)) } else { let span = p.token.span; - if p.eat_keyword_noexpect(symbol) { + if p.eat_keyword_noexpect(exp.kw) { // in gets printed as `r#in` otherwise - let symbol = if symbol == kw::In { "in" } else { symbol.as_str() }; + let symbol = if exp.kw == kw::In { "in" } else { exp.kw.as_str() }; Err(p.dcx().create_err(errors::AsmUnsupportedOperand { span, symbol, @@ -95,13 +95,13 @@ pub fn parse_asm_args<'a>( let mut allow_templates = true; while p.token != token::Eof { - if !p.eat(&token::Comma) { + if !p.eat(exp!(Comma)) { if allow_templates { // After a template string, we always expect *only* a comma... return Err(dcx.create_err(errors::AsmExpectedComma { span: p.token.span })); } else { // ...after that delegate to `expect` to also include the other expected tokens. - return Err(p.expect(&token::Comma).err().unwrap()); + return Err(p.expect(exp!(Comma)).err().unwrap()); } } if p.token == token::Eof { @@ -109,14 +109,14 @@ pub fn parse_asm_args<'a>( } // accept trailing commas // Parse clobber_abi - if p.eat_keyword(sym::clobber_abi) { + if p.eat_keyword(exp!(ClobberAbi)) { parse_clobber_abi(p, &mut args)?; allow_templates = false; continue; } // Parse options - if p.eat_keyword(sym::options) { + if p.eat_keyword(exp!(Options)) { parse_options(p, &mut args, asm_macro)?; allow_templates = false; continue; @@ -128,7 +128,7 @@ pub fn parse_asm_args<'a>( let name = if p.token.is_ident() && p.look_ahead(1, |t| *t == token::Eq) { let (ident, _) = p.token.ident().unwrap(); p.bump(); - p.expect(&token::Eq)?; + p.expect(exp!(Eq))?; allow_templates = false; Some(ident.name) } else { @@ -136,57 +136,57 @@ pub fn parse_asm_args<'a>( }; let mut explicit_reg = false; - let op = if eat_operand_keyword(p, kw::In, asm_macro)? { + let op = if eat_operand_keyword(p, exp!(In), asm_macro)? { let reg = parse_reg(p, &mut explicit_reg)?; - if p.eat_keyword(kw::Underscore) { + if p.eat_keyword(exp!(Underscore)) { let err = dcx.create_err(errors::AsmUnderscoreInput { span: p.token.span }); return Err(err); } let expr = p.parse_expr()?; ast::InlineAsmOperand::In { reg, expr } - } else if eat_operand_keyword(p, sym::out, asm_macro)? { + } else if eat_operand_keyword(p, exp!(Out), asm_macro)? { let reg = parse_reg(p, &mut explicit_reg)?; - let expr = if p.eat_keyword(kw::Underscore) { None } else { Some(p.parse_expr()?) }; + let expr = if p.eat_keyword(exp!(Underscore)) { None } else { Some(p.parse_expr()?) }; ast::InlineAsmOperand::Out { reg, expr, late: false } - } else if eat_operand_keyword(p, sym::lateout, asm_macro)? { + } else if eat_operand_keyword(p, exp!(Lateout), asm_macro)? { let reg = parse_reg(p, &mut explicit_reg)?; - let expr = if p.eat_keyword(kw::Underscore) { None } else { Some(p.parse_expr()?) }; + let expr = if p.eat_keyword(exp!(Underscore)) { None } else { Some(p.parse_expr()?) }; ast::InlineAsmOperand::Out { reg, expr, late: true } - } else if eat_operand_keyword(p, sym::inout, asm_macro)? { + } else if eat_operand_keyword(p, exp!(Inout), asm_macro)? { let reg = parse_reg(p, &mut explicit_reg)?; - if p.eat_keyword(kw::Underscore) { + if p.eat_keyword(exp!(Underscore)) { let err = dcx.create_err(errors::AsmUnderscoreInput { span: p.token.span }); return Err(err); } let expr = p.parse_expr()?; - if p.eat(&token::FatArrow) { + if p.eat(exp!(FatArrow)) { let out_expr = - if p.eat_keyword(kw::Underscore) { None } else { Some(p.parse_expr()?) }; + if p.eat_keyword(exp!(Underscore)) { None } else { Some(p.parse_expr()?) }; ast::InlineAsmOperand::SplitInOut { reg, in_expr: expr, out_expr, late: false } } else { ast::InlineAsmOperand::InOut { reg, expr, late: false } } - } else if eat_operand_keyword(p, sym::inlateout, asm_macro)? { + } else if eat_operand_keyword(p, exp!(Inlateout), asm_macro)? { let reg = parse_reg(p, &mut explicit_reg)?; - if p.eat_keyword(kw::Underscore) { + if p.eat_keyword(exp!(Underscore)) { let err = dcx.create_err(errors::AsmUnderscoreInput { span: p.token.span }); return Err(err); } let expr = p.parse_expr()?; - if p.eat(&token::FatArrow) { + if p.eat(exp!(FatArrow)) { let out_expr = - if p.eat_keyword(kw::Underscore) { None } else { Some(p.parse_expr()?) }; + if p.eat_keyword(exp!(Underscore)) { None } else { Some(p.parse_expr()?) }; ast::InlineAsmOperand::SplitInOut { reg, in_expr: expr, out_expr, late: true } } else { ast::InlineAsmOperand::InOut { reg, expr, late: true } } - } else if eat_operand_keyword(p, sym::label, asm_macro)? { + } else if eat_operand_keyword(p, exp!(Label), asm_macro)? { let block = p.parse_block()?; ast::InlineAsmOperand::Label { block } - } else if p.eat_keyword(kw::Const) { + } else if p.eat_keyword(exp!(Const)) { let anon_const = p.parse_expr_anon_const()?; ast::InlineAsmOperand::Const { anon_const } - } else if p.eat_keyword(sym::sym) { + } else if p.eat_keyword(exp!(Sym)) { let expr = p.parse_expr()?; let ast::ExprKind::Path(qself, path) = &expr.kind else { let err = dcx.create_err(errors::AsmSymNoPath { span: expr.span }); @@ -389,31 +389,31 @@ fn parse_options<'a>( ) -> PResult<'a, ()> { let span_start = p.prev_token.span; - p.expect(&token::OpenDelim(Delimiter::Parenthesis))?; - - while !p.eat(&token::CloseDelim(Delimiter::Parenthesis)) { - const OPTIONS: [(Symbol, ast::InlineAsmOptions); ast::InlineAsmOptions::COUNT] = [ - (sym::pure, ast::InlineAsmOptions::PURE), - (sym::nomem, ast::InlineAsmOptions::NOMEM), - (sym::readonly, ast::InlineAsmOptions::READONLY), - (sym::preserves_flags, ast::InlineAsmOptions::PRESERVES_FLAGS), - (sym::noreturn, ast::InlineAsmOptions::NORETURN), - (sym::nostack, ast::InlineAsmOptions::NOSTACK), - (sym::may_unwind, ast::InlineAsmOptions::MAY_UNWIND), - (sym::att_syntax, ast::InlineAsmOptions::ATT_SYNTAX), - (kw::Raw, ast::InlineAsmOptions::RAW), + p.expect(exp!(OpenParen))?; + + while !p.eat(exp!(CloseParen)) { + const OPTIONS: [(ExpKeywordPair, ast::InlineAsmOptions); ast::InlineAsmOptions::COUNT] = [ + (exp!(Pure), ast::InlineAsmOptions::PURE), + (exp!(Nomem), ast::InlineAsmOptions::NOMEM), + (exp!(Readonly), ast::InlineAsmOptions::READONLY), + (exp!(PreservesFlags), ast::InlineAsmOptions::PRESERVES_FLAGS), + (exp!(Noreturn), ast::InlineAsmOptions::NORETURN), + (exp!(Nostack), ast::InlineAsmOptions::NOSTACK), + (exp!(MayUnwind), ast::InlineAsmOptions::MAY_UNWIND), + (exp!(AttSyntax), ast::InlineAsmOptions::ATT_SYNTAX), + (exp!(Raw), ast::InlineAsmOptions::RAW), ]; 'blk: { - for (symbol, option) in OPTIONS { + for (exp, option) in OPTIONS { let kw_matched = if asm_macro.is_supported_option(option) { - p.eat_keyword(symbol) + p.eat_keyword(exp) } else { - p.eat_keyword_noexpect(symbol) + p.eat_keyword_noexpect(exp.kw) }; if kw_matched { - try_set_option(p, args, asm_macro, symbol, option); + try_set_option(p, args, asm_macro, exp.kw, option); break 'blk; } } @@ -422,10 +422,10 @@ fn parse_options<'a>( } // Allow trailing commas - if p.eat(&token::CloseDelim(Delimiter::Parenthesis)) { + if p.eat(exp!(CloseParen)) { break; } - p.expect(&token::Comma)?; + p.expect(exp!(Comma))?; } let new_span = span_start.to(p.prev_token.span); @@ -437,14 +437,14 @@ fn parse_options<'a>( fn parse_clobber_abi<'a>(p: &mut Parser<'a>, args: &mut AsmArgs) -> PResult<'a, ()> { let span_start = p.prev_token.span; - p.expect(&token::OpenDelim(Delimiter::Parenthesis))?; + p.expect(exp!(OpenParen))?; - if p.eat(&token::CloseDelim(Delimiter::Parenthesis)) { + if p.eat(exp!(CloseParen)) { return Err(p.dcx().create_err(errors::NonABI { span: p.token.span })); } let mut new_abis = Vec::new(); - while !p.eat(&token::CloseDelim(Delimiter::Parenthesis)) { + while !p.eat(exp!(CloseParen)) { match p.parse_str_lit() { Ok(str_lit) => { new_abis.push((str_lit.symbol_unescaped, str_lit.span)); @@ -456,10 +456,10 @@ fn parse_clobber_abi<'a>(p: &mut Parser<'a>, args: &mut AsmArgs) -> PResult<'a, }; // Allow trailing commas - if p.eat(&token::CloseDelim(Delimiter::Parenthesis)) { + if p.eat(exp!(CloseParen)) { break; } - p.expect(&token::Comma)?; + p.expect(exp!(Comma))?; } let full_span = span_start.to(p.prev_token.span); @@ -482,7 +482,7 @@ fn parse_reg<'a>( p: &mut Parser<'a>, explicit_reg: &mut bool, ) -> PResult<'a, ast::InlineAsmRegOrRegClass> { - p.expect(&token::OpenDelim(Delimiter::Parenthesis))?; + p.expect(exp!(OpenParen))?; let result = match p.token.uninterpolate().kind { token::Ident(name, IdentIsRaw::No) => ast::InlineAsmRegOrRegClass::RegClass(name), token::Literal(token::Lit { kind: token::LitKind::Str, symbol, suffix: _ }) => { @@ -496,7 +496,7 @@ fn parse_reg<'a>( } }; p.bump(); - p.expect(&token::CloseDelim(Delimiter::Parenthesis))?; + p.expect(exp!(CloseParen))?; Ok(result) } diff --git a/compiler/rustc_builtin_macros/src/assert.rs b/compiler/rustc_builtin_macros/src/assert.rs index 95b31c7e47a..c659b1cff59 100644 --- a/compiler/rustc_builtin_macros/src/assert.rs +++ b/compiler/rustc_builtin_macros/src/assert.rs @@ -7,6 +7,7 @@ use rustc_ast::{DelimArgs, Expr, ExprKind, MacCall, Path, PathSegment, UnOp, tok use rustc_ast_pretty::pprust; use rustc_errors::PResult; use rustc_expand::base::{DummyResult, ExpandResult, ExtCtxt, MacEager, MacroExpanderResult}; +use rustc_parse::exp; use rustc_parse::parser::Parser; use rustc_span::{DUMMY_SP, Ident, Span, Symbol, sym}; use thin_vec::thin_vec; @@ -143,7 +144,7 @@ fn parse_assert<'a>(cx: &ExtCtxt<'a>, sp: Span, stream: TokenStream) -> PResult< cx.dcx().emit_err(errors::AssertMissingComma { span: parser.token.span, comma }); parse_custom_message(&mut parser) - } else if parser.eat(&token::Comma) { + } else if parser.eat(exp!(Comma)) { parse_custom_message(&mut parser) } else { None diff --git a/compiler/rustc_builtin_macros/src/cfg.rs b/compiler/rustc_builtin_macros/src/cfg.rs index 6e90f1682e3..85b8ef79c05 100644 --- a/compiler/rustc_builtin_macros/src/cfg.rs +++ b/compiler/rustc_builtin_macros/src/cfg.rs @@ -6,6 +6,7 @@ use rustc_ast::token; use rustc_ast::tokenstream::TokenStream; use rustc_errors::PResult; use rustc_expand::base::{DummyResult, ExpandResult, ExtCtxt, MacEager, MacroExpanderResult}; +use rustc_parse::exp; use rustc_span::Span; use {rustc_ast as ast, rustc_attr_parsing as attr}; @@ -48,9 +49,9 @@ fn parse_cfg<'a>( let cfg = p.parse_meta_item_inner()?; - let _ = p.eat(&token::Comma); + let _ = p.eat(exp!(Comma)); - if !p.eat(&token::Eof) { + if !p.eat(exp!(Eof)) { return Err(cx.dcx().create_err(errors::OneCfgPattern { span })); } diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index df3750049b7..528eb7725f5 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -12,6 +12,7 @@ use rustc_errors::{Applicability, Diag, MultiSpan, PResult, SingleLabelManySpans use rustc_expand::base::*; use rustc_lint_defs::builtin::NAMED_ARGUMENTS_USED_POSITIONALLY; use rustc_lint_defs::{BufferedEarlyLint, BuiltinLintDiag, LintId}; +use rustc_parse::exp; use rustc_parse_format as parse; use rustc_span::{BytePos, ErrorGuaranteed, Ident, InnerSpan, Span, Symbol}; @@ -93,12 +94,12 @@ fn parse_args<'a>(ecx: &ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a, let mut first = true; while p.token != token::Eof { - if !p.eat(&token::Comma) { + if !p.eat(exp!(Comma)) { if first { p.clear_expected_token_types(); } - match p.expect(&token::Comma) { + match p.expect(exp!(Comma)) { Err(err) => { match token::TokenKind::Comma.similar_tokens() { Some(tks) if tks.contains(&p.token.kind) => { @@ -122,7 +123,7 @@ fn parse_args<'a>(ecx: &ExtCtxt<'a>, sp: Span, tts: TokenStream) -> PResult<'a, match p.token.ident() { Some((ident, _)) if p.look_ahead(1, |t| *t == token::Eq) => { p.bump(); - p.expect(&token::Eq)?; + p.expect(exp!(Eq))?; let expr = p.parse_expr()?; if let Some((_, prev)) = args.by_name(ident.name) { ecx.dcx().emit_err(errors::FormatDuplicateArg { diff --git a/compiler/rustc_builtin_macros/src/pattern_type.rs b/compiler/rustc_builtin_macros/src/pattern_type.rs index 90b5f097b32..a600a9f316a 100644 --- a/compiler/rustc_builtin_macros/src/pattern_type.rs +++ b/compiler/rustc_builtin_macros/src/pattern_type.rs @@ -3,7 +3,8 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::{Pat, Ty, ast}; use rustc_errors::PResult; use rustc_expand::base::{self, DummyResult, ExpandResult, ExtCtxt, MacroExpanderResult}; -use rustc_span::{Span, sym}; +use rustc_parse::exp; +use rustc_span::Span; pub(crate) fn expand<'cx>( cx: &'cx mut ExtCtxt<'_>, @@ -24,7 +25,7 @@ fn parse_pat_ty<'a>(cx: &mut ExtCtxt<'a>, stream: TokenStream) -> PResult<'a, (P let mut parser = cx.new_parser_from_tts(stream); let ty = parser.parse_ty()?; - parser.expect_keyword(sym::is)?; + parser.expect_keyword(exp!(Is))?; let pat = parser.parse_pat_no_top_alt(None, None)?; Ok((ty, pat)) diff --git a/compiler/rustc_builtin_macros/src/util.rs b/compiler/rustc_builtin_macros/src/util.rs index 2a28dfaf3c4..be12d21a800 100644 --- a/compiler/rustc_builtin_macros/src/util.rs +++ b/compiler/rustc_builtin_macros/src/util.rs @@ -7,7 +7,7 @@ use rustc_expand::expand::AstFragment; use rustc_feature::AttributeTemplate; use rustc_lint_defs::BuiltinLintDiag; use rustc_lint_defs::builtin::DUPLICATE_MACRO_ATTRIBUTES; -use rustc_parse::{parser, validate_attr}; +use rustc_parse::{exp, parser, validate_attr}; use rustc_session::errors::report_lit_error; use rustc_span::{BytePos, Span, Symbol}; @@ -204,7 +204,7 @@ pub(crate) fn get_single_expr_from_tts( Ok(ret) => ret, Err(guar) => return ExpandResult::Ready(Err(guar)), }; - let _ = p.eat(&token::Comma); + let _ = p.eat(exp!(Comma)); if p.token != token::Eof { cx.dcx().emit_err(errors::OnlyOneArgument { span, name }); @@ -237,7 +237,7 @@ pub(crate) fn get_exprs_from_tts( let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr(); es.push(expr); - if p.eat(&token::Comma) { + if p.eat(exp!(Comma)) { continue; } if p.token != token::Eof { |
