summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
authorMatthias Krüger <476013+matthiaskrgr@users.noreply.github.com>2025-07-23 15:59:30 +0200
committerGitHub <noreply@github.com>2025-07-23 15:59:30 +0200
commit80342618e1f7dfb32b5d35d6fcb204b302e70cdf (patch)
tree789a52686fcb26d30625fe588e08cb75ea88df3e /compiler
parent546929f276600f9bc794af9d4c07eac14ef6c74a (diff)
parent2832517ba174906b6d6ebedf4893dca8d8a98f55 (diff)
downloadrust-80342618e1f7dfb32b5d35d6fcb204b302e70cdf.tar.gz
rust-80342618e1f7dfb32b5d35d6fcb204b302e70cdf.zip
Rollup merge of #144239 - xizheyin:clean-lexer, r=fee1-dead
Clean `rustc/parse/src/lexer` to improve maintainability

This PR refactors the lexer code to improve maintainability and eliminate code duplication.
In the first commit, I improve the error handling:
- rename `make_unclosed_delims_error` to more appropriate `make_mismatched_closing_delims_errors`
- changes return type from Option<Diag> to `Vec<Diag>` to avoid lengthy vec processing at `lex_token_trees`
- use `splice` instead of `extend` to make the logic clearer, since `errs` sounds more generic and better suited as a return value

In the second commit, I replace the magic number 5 with UNCLOSED_DELIMITER_SHOW_LIMIT constant.

In the third commit, I moves `eof_err` function below parsing logic for better code flow.

In the forth one, I extract `calculate_spacing` function to eliminate duplicate spacing logic between `bump` and `bump_minimal` functions.

r? compiler
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_parse/src/lexer/diagnostics.rs44
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs18
-rw-r--r--compiler/rustc_parse/src/lexer/tokentrees.rs106
3 files changed, 83 insertions, 85 deletions
diff --git a/compiler/rustc_parse/src/lexer/diagnostics.rs b/compiler/rustc_parse/src/lexer/diagnostics.rs
index 6de001fc998..947f3df179f 100644
--- a/compiler/rustc_parse/src/lexer/diagnostics.rs
+++ b/compiler/rustc_parse/src/lexer/diagnostics.rs
@@ -126,23 +126,29 @@ pub(super) fn report_suspicious_mismatch_block(
     }
 }
 
-pub(crate) fn make_unclosed_delims_error(
-    unmatched: UnmatchedDelim,
-    psess: &ParseSess,
-) -> Option<Diag<'_>> {
-    // `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
-    // `unmatched_delims` only for error recovery in the `Parser`.
-    let found_delim = unmatched.found_delim?;
-    let mut spans = vec![unmatched.found_span];
-    if let Some(sp) = unmatched.unclosed_span {
-        spans.push(sp);
-    };
-    let err = psess.dcx().create_err(MismatchedClosingDelimiter {
-        spans,
-        delimiter: pprust::token_kind_to_string(&found_delim.as_close_token_kind()).to_string(),
-        unmatched: unmatched.found_span,
-        opening_candidate: unmatched.candidate_span,
-        unclosed: unmatched.unclosed_span,
-    });
-    Some(err)
+pub(crate) fn make_errors_for_mismatched_closing_delims<'psess>(
+    unmatcheds: &[UnmatchedDelim],
+    psess: &'psess ParseSess,
+) -> Vec<Diag<'psess>> {
+    unmatcheds
+        .iter()
+        .filter_map(|unmatched| {
+            // `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
+            // `unmatched_delims` only for error recovery in the `Parser`.
+            let found_delim = unmatched.found_delim?;
+            let mut spans = vec![unmatched.found_span];
+            if let Some(sp) = unmatched.unclosed_span {
+                spans.push(sp);
+            };
+            let err = psess.dcx().create_err(MismatchedClosingDelimiter {
+                spans,
+                delimiter: pprust::token_kind_to_string(&found_delim.as_close_token_kind())
+                    .to_string(),
+                unmatched: unmatched.found_span,
+                opening_candidate: unmatched.candidate_span,
+                unclosed: unmatched.unclosed_span,
+            });
+            Some(err)
+        })
+        .collect()
 }
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 60d275bf2b4..85af5a615ae 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,4 +1,4 @@
-use diagnostics::make_unclosed_delims_error;
+use diagnostics::make_errors_for_mismatched_closing_delims;
 use rustc_ast::ast::{self, AttrStyle};
 use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
@@ -71,27 +71,23 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
     };
     let res = lexer.lex_token_trees(/* is_delimited */ false);
 
-    let mut unmatched_delims: Vec<_> = lexer
-        .diag_info
-        .unmatched_delims
-        .into_iter()
-        .filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
-        .collect();
+    let mut unmatched_closing_delims: Vec<_> =
+        make_errors_for_mismatched_closing_delims(&lexer.diag_info.unmatched_delims, psess);
 
     match res {
         Ok((_open_spacing, stream)) => {
-            if unmatched_delims.is_empty() {
+            if unmatched_closing_delims.is_empty() {
                 Ok(stream)
             } else {
                 // Return error if there are unmatched delimiters or unclosed delimiters.
-                Err(unmatched_delims)
+                Err(unmatched_closing_delims)
             }
         }
         Err(errs) => {
             // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
             // because the delimiter mismatch is more likely to be the root cause of error
-            unmatched_delims.extend(errs);
-            Err(unmatched_delims)
+            unmatched_closing_delims.extend(errs);
+            Err(unmatched_closing_delims)
         }
     }
 }
diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
index 64748199f28..634f4c30b26 100644
--- a/compiler/rustc_parse/src/lexer/tokentrees.rs
+++ b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -51,45 +51,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         }
     }
 
-    fn eof_err(&mut self) -> Diag<'psess> {
-        let msg = "this file contains an unclosed delimiter";
-        let mut err = self.dcx().struct_span_err(self.token.span, msg);
-
-        let unclosed_delimiter_show_limit = 5;
-        let len = usize::min(unclosed_delimiter_show_limit, self.diag_info.open_delimiters.len());
-        for &(_, span) in &self.diag_info.open_delimiters[..len] {
-            err.span_label(span, "unclosed delimiter");
-            self.diag_info.unmatched_delims.push(UnmatchedDelim {
-                found_delim: None,
-                found_span: self.token.span,
-                unclosed_span: Some(span),
-                candidate_span: None,
-            });
-        }
-
-        if let Some((_, span)) = self.diag_info.open_delimiters.get(unclosed_delimiter_show_limit)
-            && self.diag_info.open_delimiters.len() >= unclosed_delimiter_show_limit + 2
-        {
-            err.span_label(
-                *span,
-                format!(
-                    "another {} unclosed delimiters begin from here",
-                    self.diag_info.open_delimiters.len() - unclosed_delimiter_show_limit
-                ),
-            );
-        }
-
-        if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
-            report_suspicious_mismatch_block(
-                &mut err,
-                &self.diag_info,
-                self.psess.source_map(),
-                *delim,
-            )
-        }
-        err
-    }
-
     fn lex_token_tree_open_delim(
         &mut self,
         open_delim: Delimiter,
@@ -206,13 +167,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
             } else if let Some(glued) = self.token.glue(&next_tok) {
                 self.token = glued;
             } else {
-                let this_spacing = if next_tok.is_punct() {
-                    Spacing::Joint
-                } else if next_tok == token::Eof {
-                    Spacing::Alone
-                } else {
-                    Spacing::JointHidden
-                };
+                let this_spacing = self.calculate_spacing(&next_tok);
                 break (this_spacing, next_tok);
             }
         };
@@ -223,23 +178,64 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
     // Cut-down version of `bump` used when the token kind is known in advance.
     fn bump_minimal(&mut self) -> Spacing {
         let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
-
         let this_spacing = if is_next_tok_preceded_by_whitespace {
             Spacing::Alone
         } else {
-            if next_tok.is_punct() {
-                Spacing::Joint
-            } else if next_tok == token::Eof {
-                Spacing::Alone
-            } else {
-                Spacing::JointHidden
-            }
+            self.calculate_spacing(&next_tok)
         };
-
         self.token = next_tok;
         this_spacing
     }
 
+    fn calculate_spacing(&self, next_tok: &Token) -> Spacing {
+        if next_tok.is_punct() {
+            Spacing::Joint
+        } else if *next_tok == token::Eof {
+            Spacing::Alone
+        } else {
+            Spacing::JointHidden
+        }
+    }
+
+    fn eof_err(&mut self) -> Diag<'psess> {
+        const UNCLOSED_DELIMITER_SHOW_LIMIT: usize = 5;
+        let msg = "this file contains an unclosed delimiter";
+        let mut err = self.dcx().struct_span_err(self.token.span, msg);
+
+        let len = usize::min(UNCLOSED_DELIMITER_SHOW_LIMIT, self.diag_info.open_delimiters.len());
+        for &(_, span) in &self.diag_info.open_delimiters[..len] {
+            err.span_label(span, "unclosed delimiter");
+            self.diag_info.unmatched_delims.push(UnmatchedDelim {
+                found_delim: None,
+                found_span: self.token.span,
+                unclosed_span: Some(span),
+                candidate_span: None,
+            });
+        }
+
+        if let Some((_, span)) = self.diag_info.open_delimiters.get(UNCLOSED_DELIMITER_SHOW_LIMIT)
+            && self.diag_info.open_delimiters.len() >= UNCLOSED_DELIMITER_SHOW_LIMIT + 2
+        {
+            err.span_label(
+                *span,
+                format!(
+                    "another {} unclosed delimiters begin from here",
+                    self.diag_info.open_delimiters.len() - UNCLOSED_DELIMITER_SHOW_LIMIT
+                ),
+            );
+        }
+
+        if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
+            report_suspicious_mismatch_block(
+                &mut err,
+                &self.diag_info,
+                self.psess.source_map(),
+                *delim,
+            )
+        }
+        err
+    }
+
     fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
         // An unexpected closing delimiter (i.e., there is no matching opening delimiter).
         let token_str = token_to_string(&self.token);