about summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
author许杰友 Jieyou Xu (Joe) <39484203+jieyouxu@users.noreply.github.com>2024-12-18 22:56:53 +0800
committerGitHub <noreply@github.com>2024-12-18 22:56:53 +0800
commit0a2d708c31a66a094287fca1db7b2ad0f976e0a5 (patch)
treefa58488c60e3e4884b702ed8be39c35129a45f9b /compiler
parent477f222b02300761cea70ab4644275ce245a37c4 (diff)
parent6de550cc68d5eb7dc5013987c87859e2f589ac12 (diff)
downloadrust-0a2d708c31a66a094287fca1db7b2ad0f976e0a5.tar.gz
rust-0a2d708c31a66a094287fca1db7b2ad0f976e0a5.zip
Rollup merge of #134253 - nnethercote:overhaul-keywords, r=petrochenkov
Overhaul keyword handling

The compiler's list of keywords has some problems.
- It contains several items that aren't keywords.
- The order isn't quite right in a couple of places.
- Some of the names of predicates relating to keywords are confusing.
- rustdoc and rustfmt have their own (incorrect) versions of the keyword list.
- `AllKeywords` is unnecessarily complex.

r? ```@jieyouxu```
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_ast/src/token.rs8
-rw-r--r--compiler/rustc_parse/src/parser/diagnostics.rs8
-rw-r--r--compiler/rustc_span/src/symbol.rs85
3 files changed, 55 insertions, 46 deletions
diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs
index ab82f18133e..f639e785bc4 100644
--- a/compiler/rustc_ast/src/token.rs
+++ b/compiler/rustc_ast/src/token.rs
@@ -903,7 +903,8 @@ impl Token {
         self.is_non_raw_ident_where(|id| id.name == kw)
     }
 
-    /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this token is an identifier equal to `kw` ignoring the case.
+    /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this
+    /// token is an identifier equal to `kw` ignoring the case.
     pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
         self.is_keyword(kw)
             || (case == Case::Insensitive
@@ -916,6 +917,11 @@ impl Token {
         self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
     }
 
+    /// Don't use this unless you're doing something very loose and heuristic-y.
+    pub fn is_any_keyword(&self) -> bool {
+        self.is_non_raw_ident_where(Ident::is_any_keyword)
+    }
+
     /// Returns true for reserved identifiers used internally for elided lifetimes,
     /// unnamed method parameters, crate root module, error recovery etc.
     pub fn is_special_ident(&self) -> bool {
diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs
index d1a725e729a..8417701ac0c 100644
--- a/compiler/rustc_parse/src/parser/diagnostics.rs
+++ b/compiler/rustc_parse/src/parser/diagnostics.rs
@@ -22,7 +22,7 @@ use rustc_errors::{
 use rustc_session::errors::ExprParenthesesNeeded;
 use rustc_span::edit_distance::find_best_match_for_name;
 use rustc_span::source_map::Spanned;
-use rustc_span::symbol::AllKeywords;
+use rustc_span::symbol::used_keywords;
 use rustc_span::{BytePos, DUMMY_SP, Ident, Span, SpanSnippetError, Symbol, kw, sym};
 use thin_vec::{ThinVec, thin_vec};
 use tracing::{debug, trace};
@@ -811,12 +811,12 @@ impl<'a> Parser<'a> {
             // so that it gets generated only when the diagnostic needs it.
             // Also, it is unlikely that this list is generated multiple times because the
             // parser halts after execution hits this path.
-            let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition());
+            let all_keywords = used_keywords(|| prev_ident.span.edition());
 
             // Otherwise, check the previous token with all the keywords as possible candidates.
             // This handles code like `Struct Human;` and `While a < b {}`.
-            // We check the previous token only when the current token is an identifier to avoid false
-            // positives like suggesting keyword `for` for `extern crate foo {}`.
+            // We check the previous token only when the current token is an identifier to avoid
+            // false positives like suggesting keyword `for` for `extern crate foo {}`.
             if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
                 err.subdiagnostic(misspelled_kw);
                 // We don't want other suggestions to be added as they are most likely meaningless
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 7d99ca5a31e..a7ff0576f92 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -20,18 +20,26 @@ mod tests;
 
 // The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
 symbols! {
-    // If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword`
-    // and `AllKeywords`.
+    // This list includes things that are definitely keywords (e.g. `if`),
+    // a few things that are definitely not keywords (e.g. the empty symbol,
+    // `{{root}}`) and things where there is disagreement between people and/or
+    // documents (such as the Rust Reference) about whether it is a keyword
+    // (e.g. `_`).
+    //
+    // If you modify this list, adjust any relevant `Symbol::{is,can_be}_*` predicates and
+    // `used_keywords`.
     // But this should rarely be necessary if the keywords are kept in alphabetic order.
     Keywords {
         // Special reserved identifiers used internally for elided lifetimes,
         // unnamed method parameters, crate root module, error recovery etc.
+        // Matching predicates: `is_any_keyword`, `is_special`/`is_reserved`
         Empty:              "",
         PathRoot:           "{{root}}",
         DollarCrate:        "$crate",
         Underscore:         "_",
 
         // Keywords that are used in stable Rust.
+        // Matching predicates: `is_any_keyword`, `is_used_keyword_always`/`is_reserved`
         As:                 "as",
         Break:              "break",
         Const:              "const",
@@ -69,6 +77,7 @@ symbols! {
         While:              "while",
 
         // Keywords that are used in unstable Rust or reserved for future use.
+        // Matching predicates: `is_any_keyword`, `is_unused_keyword_always`/`is_reserved`
         Abstract:           "abstract",
         Become:             "become",
         Box:                "box",
@@ -83,23 +92,29 @@ symbols! {
         Yield:              "yield",
 
         // Edition-specific keywords that are used in stable Rust.
+        // Matching predicates: `is_any_keyword`, `is_used_keyword_conditional`/`is_reserved` (if
+        // the edition suffices)
         Async:              "async", // >= 2018 Edition only
         Await:              "await", // >= 2018 Edition only
         Dyn:                "dyn", // >= 2018 Edition only
 
         // Edition-specific keywords that are used in unstable Rust or reserved for future use.
+        // Matching predicates: `is_any_keyword`, `is_unused_keyword_conditional`/`is_reserved` (if
+        // the edition suffices)
+        Gen:                "gen", // >= 2024 Edition only
         Try:                "try", // >= 2018 Edition only
 
-        // Special lifetime names
+        // "Lifetime keywords": regular keywords with a leading `'`.
+        // Matching predicates: `is_any_keyword`
         UnderscoreLifetime: "'_",
         StaticLifetime:     "'static",
 
         // Weak keywords, have special meaning only in specific contexts.
+        // Matching predicates: `is_any_keyword`
         Auto:               "auto",
         Builtin:            "builtin",
         Catch:              "catch",
         Default:            "default",
-        Gen:                "gen",
         MacroRules:         "macro_rules",
         Raw:                "raw",
         Reuse:              "reuse",
@@ -2589,6 +2604,11 @@ pub mod sym {
 }
 
 impl Symbol {
+    /// Don't use this unless you're doing something very loose and heuristic-y.
+    pub fn is_any_keyword(self) -> bool {
+        self >= kw::As && self <= kw::Yeet
+    }
+
     fn is_special(self) -> bool {
         self <= kw::Underscore
     }
@@ -2606,8 +2626,8 @@ impl Symbol {
     }
 
     fn is_unused_keyword_conditional(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
-        self == kw::Try && edition().at_least_rust_2018()
-            || self == kw::Gen && edition().at_least_rust_2024()
+        self == kw::Gen && edition().at_least_rust_2024()
+            || self == kw::Try && edition().at_least_rust_2018()
     }
 
     pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool {
@@ -2645,6 +2665,11 @@ impl Symbol {
 }
 
 impl Ident {
+    /// Don't use this unless you're doing something very loose and heuristic-y.
+    pub fn is_any_keyword(self) -> bool {
+        self.name.is_any_keyword()
+    }
+
     /// Returns `true` for reserved identifiers used internally for elided lifetimes,
     /// unnamed method parameters, crate root module, error recovery etc.
     pub fn is_special(self) -> bool {
@@ -2683,41 +2708,19 @@ impl Ident {
     }
 }
 
-/// An iterator over all the keywords in Rust.
-#[derive(Copy, Clone)]
-pub struct AllKeywords {
-    curr_idx: u32,
-    end_idx: u32,
-}
-
-impl AllKeywords {
-    /// Initialize a new iterator over all the keywords.
-    ///
-    /// *Note:* Please update this if a new keyword is added beyond the current
-    /// range.
-    pub fn new() -> Self {
-        AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() }
-    }
-
-    /// Collect all the keywords in a given edition into a vector.
-    pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
-        self.filter(|&keyword| {
-            keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition)
+/// Collect all the keywords in a given edition into a vector.
+///
+/// *Note:* Please update this if a new keyword is added beyond the current
+/// range.
+pub fn used_keywords(edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
+    (kw::Empty.as_u32()..kw::Yeet.as_u32())
+        .filter_map(|kw| {
+            let kw = Symbol::new(kw);
+            if kw.is_used_keyword_always() || kw.is_used_keyword_conditional(edition) {
+                Some(kw)
+            } else {
+                None
+            }
         })
         .collect()
-    }
-}
-
-impl Iterator for AllKeywords {
-    type Item = Symbol;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.curr_idx <= self.end_idx {
-            let keyword = Symbol::new(self.curr_idx);
-            self.curr_idx += 1;
-            Some(keyword)
-        } else {
-            None
-        }
-    }
 }