diff options
| author | 许杰友 Jieyou Xu (Joe) <39484203+jieyouxu@users.noreply.github.com> | 2024-12-18 22:56:53 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-18 22:56:53 +0800 |
| commit | 0a2d708c31a66a094287fca1db7b2ad0f976e0a5 (patch) | |
| tree | fa58488c60e3e4884b702ed8be39c35129a45f9b /compiler | |
| parent | 477f222b02300761cea70ab4644275ce245a37c4 (diff) | |
| parent | 6de550cc68d5eb7dc5013987c87859e2f589ac12 (diff) | |
| download | rust-0a2d708c31a66a094287fca1db7b2ad0f976e0a5.tar.gz rust-0a2d708c31a66a094287fca1db7b2ad0f976e0a5.zip | |
Rollup merge of #134253 - nnethercote:overhaul-keywords, r=petrochenkov
Overhaul keyword handling The compiler's list of keywords has some problems. - It contains several items that aren't keywords. - The order isn't quite right in a couple of places. - Some of the names of predicates relating to keywords are confusing. - rustdoc and rustfmt have their own (incorrect) versions of the keyword list. - `AllKeywords` is unnecessarily complex. r? ```@jieyouxu```
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_ast/src/token.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/diagnostics.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 85 |
3 files changed, 55 insertions, 46 deletions
diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index ab82f18133e..f639e785bc4 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -903,7 +903,8 @@ impl Token { self.is_non_raw_ident_where(|id| id.name == kw) } - /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this token is an identifier equal to `kw` ignoring the case. + /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this + /// token is an identifier equal to `kw` ignoring the case. pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool { self.is_keyword(kw) || (case == Case::Insensitive @@ -916,6 +917,11 @@ impl Token { self.is_non_raw_ident_where(Ident::is_path_segment_keyword) } + /// Don't use this unless you're doing something very loose and heuristic-y. + pub fn is_any_keyword(&self) -> bool { + self.is_non_raw_ident_where(Ident::is_any_keyword) + } + /// Returns true for reserved identifiers used internally for elided lifetimes, /// unnamed method parameters, crate root module, error recovery etc. pub fn is_special_ident(&self) -> bool { diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index d1a725e729a..8417701ac0c 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -22,7 +22,7 @@ use rustc_errors::{ use rustc_session::errors::ExprParenthesesNeeded; use rustc_span::edit_distance::find_best_match_for_name; use rustc_span::source_map::Spanned; -use rustc_span::symbol::AllKeywords; +use rustc_span::symbol::used_keywords; use rustc_span::{BytePos, DUMMY_SP, Ident, Span, SpanSnippetError, Symbol, kw, sym}; use thin_vec::{ThinVec, thin_vec}; use tracing::{debug, trace}; @@ -811,12 +811,12 @@ impl<'a> Parser<'a> { // so that it gets generated only when the diagnostic needs it. // Also, it is unlikely that this list is generated multiple times because the // parser halts after execution hits this path. - let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition()); + let all_keywords = used_keywords(|| prev_ident.span.edition()); // Otherwise, check the previous token with all the keywords as possible candidates. // This handles code like `Struct Human;` and `While a < b {}`. - // We check the previous token only when the current token is an identifier to avoid false - // positives like suggesting keyword `for` for `extern crate foo {}`. + // We check the previous token only when the current token is an identifier to avoid + // false positives like suggesting keyword `for` for `extern crate foo {}`. if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) { err.subdiagnostic(misspelled_kw); // We don't want other suggestions to be added as they are most likely meaningless diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 7d99ca5a31e..a7ff0576f92 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -20,18 +20,26 @@ mod tests; // The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`. symbols! { - // If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword` - // and `AllKeywords`. + // This list includes things that are definitely keywords (e.g. `if`), + // a few things that are definitely not keywords (e.g. the empty symbol, + // `{{root}}`) and things where there is disagreement between people and/or + // documents (such as the Rust Reference) about whether it is a keyword + // (e.g. `_`). + // + // If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` predicates and + // `used_keywords`. // But this should rarely be necessary if the keywords are kept in alphabetic order. Keywords { // Special reserved identifiers used internally for elided lifetimes, // unnamed method parameters, crate root module, error recovery etc. + // Matching predicates: `is_any_keyword`, `is_special`/`is_reserved` Empty: "", PathRoot: "{{root}}", DollarCrate: "$crate", Underscore: "_", // Keywords that are used in stable Rust. + // Matching predicates: `is_any_keyword`, `is_used_keyword_always`/`is_reserved` As: "as", Break: "break", Const: "const", @@ -69,6 +77,7 @@ symbols! { While: "while", // Keywords that are used in unstable Rust or reserved for future use. + // Matching predicates: `is_any_keyword`, `is_unused_keyword_always`/`is_reserved` Abstract: "abstract", Become: "become", Box: "box", @@ -83,23 +92,29 @@ symbols! { Yield: "yield", // Edition-specific keywords that are used in stable Rust. + // Matching predicates: `is_any_keyword`, `is_used_keyword_conditional`/`is_reserved` (if + // the edition suffices) Async: "async", // >= 2018 Edition only Await: "await", // >= 2018 Edition only Dyn: "dyn", // >= 2018 Edition only // Edition-specific keywords that are used in unstable Rust or reserved for future use. + // Matching predicates: `is_any_keyword`, `is_unused_keyword_conditional`/`is_reserved` (if + // the edition suffices) + Gen: "gen", // >= 2024 Edition only Try: "try", // >= 2018 Edition only - // Special lifetime names + // "Lifetime keywords": regular keywords with a leading `'`. + // Matching predicates: `is_any_keyword` UnderscoreLifetime: "'_", StaticLifetime: "'static", // Weak keywords, have special meaning only in specific contexts. + // Matching predicates: `is_any_keyword` Auto: "auto", Builtin: "builtin", Catch: "catch", Default: "default", - Gen: "gen", MacroRules: "macro_rules", Raw: "raw", Reuse: "reuse", @@ -2589,6 +2604,11 @@ pub mod sym { } impl Symbol { + /// Don't use this unless you're doing something very loose and heuristic-y. + pub fn is_any_keyword(self) -> bool { + self >= kw::As && self <= kw::Yeet + } + fn is_special(self) -> bool { self <= kw::Underscore } @@ -2606,8 +2626,8 @@ impl Symbol { } fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool { - self == kw::Try && edition().at_least_rust_2018() - || self == kw::Gen && edition().at_least_rust_2024() + self == kw::Gen && edition().at_least_rust_2024() + || self == kw::Try && edition().at_least_rust_2018() } pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool { @@ -2645,6 +2665,11 @@ impl Symbol { } impl Ident { + /// Don't use this unless you're doing something very loose and heuristic-y. + pub fn is_any_keyword(self) -> bool { + self.name.is_any_keyword() + } + /// Returns `true` for reserved identifiers used internally for elided lifetimes, /// unnamed method parameters, crate root module, error recovery etc. pub fn is_special(self) -> bool { @@ -2683,41 +2708,19 @@ impl Ident { } } -/// An iterator over all the keywords in Rust. -#[derive(Copy, Clone)] -pub struct AllKeywords { - curr_idx: u32, - end_idx: u32, -} - -impl AllKeywords { - /// Initialize a new iterator over all the keywords. - /// - /// *Note:* Please update this if a new keyword is added beyond the current - /// range. - pub fn new() -> Self { - AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() } - } - - /// Collect all the keywords in a given edition into a vector. - pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> { - self.filter(|&keyword| { - keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition) +/// Collect all the keywords in a given edition into a vector. +/// +/// *Note:* Please update this if a new keyword is added beyond the current +/// range. +pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> { + (kw::Empty.as_u32()..kw::Yeet.as_u32()) + .filter_map(|kw| { + let kw = Symbol::new(kw); + if kw.is_used_keyword_always() || kw.is_used_keyword_conditional(edition) { + Some(kw) + } else { + None + } }) .collect() - } -} - -impl Iterator for AllKeywords { - type Item = Symbol; - - fn next(&mut self) -> Option<Self::Item> { - if self.curr_idx <= self.end_idx { - let keyword = Symbol::new(self.curr_idx); - self.curr_idx += 1; - Some(keyword) - } else { - None - } - } } |
