diff options
| author | Matthias Krüger <matthias.krueger@famsik.de> | 2024-12-14 05:01:06 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-14 05:01:06 +0100 |
| commit | ac6ac81a67600e949747310c45b79c8fb989b7e4 (patch) | |
| tree | 0203babf7d95e5bfa4608789192c16f9b288ff55 | |
| parent | 03e328d1786e023e7160bb250395b51bd6b58e9e (diff) | |
| parent | 2e412fef75ff2f45186863a76fdd7c5a9ec1f018 (diff) | |
| download | rust-ac6ac81a67600e949747310c45b79c8fb989b7e4.tar.gz rust-ac6ac81a67600e949747310c45b79c8fb989b7e4.zip | |
Rollup merge of #134192 - nnethercote:rm-Lexer-Parser-dep, r=compiler-errors
Remove `Lexer`'s dependency on `Parser`.
Lexing precedes parsing, as you'd expect: `Lexer` creates a `TokenStream` and `Parser` then parses that `TokenStream`.
But, in a horrendous violation of layering abstractions and common sense, `Lexer` depends on `Parser`! The `Lexer::unclosed_delim_err` method does some error recovery that relies on creating a `Parser` to do some post-processing of the `TokenStream` that the `Lexer` just created.
This commit just removes `unclosed_delim_err`. This change removes `Lexer`'s dependency on `Parser`, and also means that `lex_token_tree`'s return value can have a more typical form.
The cost is slightly worse error messages in two obscure cases, as shown in these tests:
- tests/ui/parser/brace-in-let-chain.rs: there is slightly less explanation in this case involving an extra `{`.
- tests/ui/parser/diff-markers/unclosed-delims{,-in-macro}.rs: the diff marker detection is no longer supported (because that detection is implemented in the parser).
In my opinion this cost is outweighed by the magnitude of the code cleanup.
r? ```````@chenyukang```````
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 40 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/tokentrees.rs | 94 | ||||
| -rw-r--r-- | tests/ui/parser/brace-in-let-chain.rs | 4 | ||||
| -rw-r--r-- | tests/ui/parser/brace-in-let-chain.stderr | 30 | ||||
| -rw-r--r-- | tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs | 6 | ||||
| -rw-r--r-- | tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr | 27 | ||||
| -rw-r--r-- | tests/ui/parser/diff-markers/unclosed-delims.rs | 14 | ||||
| -rw-r--r-- | tests/ui/parser/diff-markers/unclosed-delims.stderr | 27 |
8 files changed, 68 insertions, 174 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 2426eb81678..443ddfc94ec 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -69,24 +69,30 @@ pub(crate) fn lex_token_trees<'psess, 'src>( token: Token::dummy(), diag_info: TokenTreeDiagInfo::default(), }; - let (_open_spacing, stream, res) = lexer.lex_token_trees(/* is_delimited */ false); - let unmatched_delims = lexer.diag_info.unmatched_delims; - - if res.is_ok() && unmatched_delims.is_empty() { - Ok(stream) - } else { - // Return error if there are unmatched delimiters or unclosed delimiters. - // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch - // because the delimiter mismatch is more likely to be the root cause of error - let mut buffer: Vec<_> = unmatched_delims - .into_iter() - .filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess)) - .collect(); - if let Err(errs) = res { - // Add unclosing delimiter or diff marker errors - buffer.extend(errs); + let res = lexer.lex_token_trees(/* is_delimited */ false); + + let mut unmatched_delims: Vec<_> = lexer + .diag_info + .unmatched_delims + .into_iter() + .filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess)) + .collect(); + + match res { + Ok((_open_spacing, stream)) => { + if unmatched_delims.is_empty() { + Ok(stream) + } else { + // Return error if there are unmatched delimiters or unclosed delimiters. + Err(unmatched_delims) + } + } + Err(errs) => { + // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch + // because the delimiter mismatch is more likely to be the root cause of error + unmatched_delims.extend(errs); + Err(unmatched_delims) } - Err(buffer) } } diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index ee38f16d4ec..b3f83a32024 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -1,12 +1,10 @@ use rustc_ast::token::{self, Delimiter, Token}; use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree}; use rustc_ast_pretty::pprust::token_to_string; -use rustc_errors::{Applicability, Diag}; -use rustc_span::symbol::kw; +use rustc_errors::Diag; use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level}; use super::{Lexer, UnmatchedDelim}; -use crate::Parser; impl<'psess, 'src> Lexer<'psess, 'src> { // Lex into a token stream. The `Spacing` in the result is that of the @@ -14,7 +12,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { pub(super) fn lex_token_trees( &mut self, is_delimited: bool, - ) -> (Spacing, TokenStream, Result<(), Vec<Diag<'psess>>>) { + ) -> Result<(Spacing, TokenStream), Vec<Diag<'psess>>> { // Move past the opening delimiter. let open_spacing = self.bump_minimal(); @@ -27,25 +25,25 @@ impl<'psess, 'src> Lexer<'psess, 'src> { debug_assert!(!matches!(delim, Delimiter::Invisible(_))); buf.push(match self.lex_token_tree_open_delim(delim) { Ok(val) => val, - Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)), + Err(errs) => return Err(errs), }) } token::CloseDelim(delim) => { // Invisible delimiters cannot occur here because `TokenTreesReader` parses // code directly from strings, with no macro expansion involved. debug_assert!(!matches!(delim, Delimiter::Invisible(_))); - return ( - open_spacing, - TokenStream::new(buf), - if is_delimited { Ok(()) } else { Err(vec![self.close_delim_err(delim)]) }, - ); + return if is_delimited { + Ok((open_spacing, TokenStream::new(buf))) + } else { + Err(vec![self.close_delim_err(delim)]) + }; } token::Eof => { - return ( - open_spacing, - TokenStream::new(buf), - if is_delimited { Err(vec![self.eof_err()]) } else { Ok(()) }, - ); + return if is_delimited { + Err(vec![self.eof_err()]) + } else { + Ok((open_spacing, TokenStream::new(buf))) + }; } _ => { // Get the next normal token. @@ -107,10 +105,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Lex the token trees within the delimiters. // We stop at any delimiter so we can try to recover if the user // uses an incorrect delimiter. - let (open_spacing, tts, res) = self.lex_token_trees(/* is_delimited */ true); - if let Err(errs) = res { - return Err(self.unclosed_delim_err(tts, errs)); - } + let (open_spacing, tts) = self.lex_token_trees(/* is_delimited */ true)?; // Expand to cover the entire delimited token tree. let delim_span = DelimSpan::from_pair(pre_span, self.token.span); @@ -247,67 +242,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> { this_spacing } - fn unclosed_delim_err( - &mut self, - tts: TokenStream, - mut errs: Vec<Diag<'psess>>, - ) -> Vec<Diag<'psess>> { - // If there are unclosed delims, see if there are diff markers and if so, point them - // out instead of complaining about the unclosed delims. - let mut parser = Parser::new(self.psess, tts, None); - let mut diff_errs = vec![]; - // Suggest removing a `{` we think appears in an `if`/`while` condition. - // We want to suggest removing a `{` only if we think we're in an `if`/`while` condition, - // but we have no way of tracking this in the lexer itself, so we piggyback on the parser. - let mut in_cond = false; - while parser.token != token::Eof { - if let Err(diff_err) = parser.err_vcs_conflict_marker() { - diff_errs.push(diff_err); - } else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) { - in_cond = true; - } else if matches!( - parser.token.kind, - token::CloseDelim(Delimiter::Brace) | token::FatArrow - ) { - // End of the `if`/`while` body, or the end of a `match` guard. - in_cond = false; - } else if in_cond && parser.token == token::OpenDelim(Delimiter::Brace) { - // Store the `&&` and `let` to use their spans later when creating the diagnostic - let maybe_andand = parser.look_ahead(1, |t| t.clone()); - let maybe_let = parser.look_ahead(2, |t| t.clone()); - if maybe_andand == token::OpenDelim(Delimiter::Brace) { - // This might be the beginning of the `if`/`while` body (i.e., the end of the - // condition). - in_cond = false; - } else if maybe_andand == token::AndAnd && maybe_let.is_keyword(kw::Let) { - let mut err = parser.dcx().struct_span_err( - parser.token.span, - "found a `{` in the middle of a let-chain", - ); - err.span_suggestion( - parser.token.span, - "consider removing this brace to parse the `let` as part of the same chain", - "", - Applicability::MachineApplicable, - ); - err.span_label( - maybe_andand.span.to(maybe_let.span), - "you might have meant to continue the let-chain here", - ); - errs.push(err); - } - } - parser.bump(); - } - if !diff_errs.is_empty() { - for err in errs { - err.cancel(); - } - return diff_errs; - } - errs - } - fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> { // An unexpected closing delimiter (i.e., there is no matching opening delimiter). let token_str = token_to_string(&self.token); diff --git a/tests/ui/parser/brace-in-let-chain.rs b/tests/ui/parser/brace-in-let-chain.rs index 1f34c73a2c3..2009bc88d9e 100644 --- a/tests/ui/parser/brace-in-let-chain.rs +++ b/tests/ui/parser/brace-in-let-chain.rs @@ -3,7 +3,7 @@ #![feature(let_chains)] fn main() { if let () = () - && let () = () { //~ERROR: found a `{` in the middle of a let-chain + && let () = () { && let () = () { } @@ -11,7 +11,7 @@ fn main() { fn quux() { while let () = () - && let () = () { //~ERROR: found a `{` in the middle of a let-chain + && let () = () { && let () = () { } diff --git a/tests/ui/parser/brace-in-let-chain.stderr b/tests/ui/parser/brace-in-let-chain.stderr index 913a34700df..12af95c2786 100644 --- a/tests/ui/parser/brace-in-let-chain.stderr +++ b/tests/ui/parser/brace-in-let-chain.stderr @@ -27,33 +27,5 @@ LL | } LL | } | ^ -error: found a `{` in the middle of a let-chain - --> $DIR/brace-in-let-chain.rs:14:24 - | -LL | && let () = () { - | ^ -LL | && let () = () - | ------ you might have meant to continue the let-chain here - | -help: consider removing this brace to parse the `let` as part of the same chain - | -LL - && let () = () { -LL + && let () = () - | - -error: found a `{` in the middle of a let-chain - --> $DIR/brace-in-let-chain.rs:6:24 - | -LL | && let () = () { - | ^ -LL | && let () = () - | ------ you might have meant to continue the let-chain here - | -help: consider removing this brace to parse the `let` as part of the same chain - | -LL - && let () = () { -LL + && let () = () - | - -error: aborting due to 3 previous errors +error: aborting due to 1 previous error diff --git a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs index da1774acea5..41a7de03d4b 100644 --- a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs +++ b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs @@ -1,9 +1,11 @@ +// The diff marker detection was removed for this example, because it relied on +// the lexer having a dependency on the parser, which was horrible. + macro_rules! foo { <<<<<<< HEAD - //~^ ERROR encountered diff marker () { ======= () { // >>>>>>> 7a4f13c blah blah blah } -} +} //~ this file contains an unclosed delimiter diff --git a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr index 927821ddfae..b33f2b5d1b8 100644 --- a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr +++ b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr @@ -1,23 +1,16 @@ -error: encountered diff marker - --> $DIR/unclosed-delims-in-macro.rs:2:1 +error: this file contains an unclosed delimiter + --> $DIR/unclosed-delims-in-macro.rs:11:48 | +LL | macro_rules! foo { + | - unclosed delimiter LL | <<<<<<< HEAD - | ^^^^^^^ between this marker and `=======` is the code that we're merging into +LL | () { + | - this delimiter might not be properly closed... ... -LL | ======= - | ------- between this marker and `>>>>>>>` is the incoming code -LL | () { // -LL | >>>>>>> 7a4f13c blah blah blah - | ^^^^^^^ this marker concludes the conflict region - | - = note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts - to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers - = help: if you're having merge conflicts after pulling new code: - the top section is the code you already had and the bottom section is the remote code - if you're in the middle of a rebase: - the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased - = note: for an explanation on these markers from the `git` documentation: - visit <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts> +LL | } + | - ^ + | | + | ...as it matches this but it has different indentation error: aborting due to 1 previous error diff --git a/tests/ui/parser/diff-markers/unclosed-delims.rs b/tests/ui/parser/diff-markers/unclosed-delims.rs index 7d400c3827b..827c1eebb9d 100644 --- a/tests/ui/parser/diff-markers/unclosed-delims.rs +++ b/tests/ui/parser/diff-markers/unclosed-delims.rs @@ -1,18 +1,12 @@ +// The diff marker detection was removed for this example, because it relied on +// the lexer having a dependency on the parser, which was horrible. + mod tests { #[test] <<<<<<< HEAD -//~^ ERROR encountered diff marker -//~| NOTE between this marker and `=======` - -//~| NOTE conflict markers indicate that -//~| HELP if you're having merge conflicts -//~| NOTE for an explanation on these markers - fn test1() { ======= -//~^ NOTE between this marker and `>>>>>>>` fn test2() { >>>>>>> 7a4f13c blah blah blah -//~^ NOTE this marker concludes the conflict region } -} +} //~ this file contains an unclosed delimiter diff --git a/tests/ui/parser/diff-markers/unclosed-delims.stderr b/tests/ui/parser/diff-markers/unclosed-delims.stderr index 1eab96442b4..b2541aa47ba 100644 --- a/tests/ui/parser/diff-markers/unclosed-delims.stderr +++ b/tests/ui/parser/diff-markers/unclosed-delims.stderr @@ -1,23 +1,16 @@ -error: encountered diff marker - --> $DIR/unclosed-delims.rs:3:1 +error: this file contains an unclosed delimiter + --> $DIR/unclosed-delims.rs:12:48 | -LL | <<<<<<< HEAD - | ^^^^^^^ between this marker and `=======` is the code that we're merging into +LL | mod tests { + | - unclosed delimiter ... -LL | ======= - | ------- between this marker and `>>>>>>>` is the incoming code +LL | fn test1() { + | - this delimiter might not be properly closed... ... -LL | >>>>>>> 7a4f13c blah blah blah - | ^^^^^^^ this marker concludes the conflict region - | - = note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts - to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers - = help: if you're having merge conflicts after pulling new code: - the top section is the code you already had and the bottom section is the remote code - if you're in the middle of a rebase: - the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased - = note: for an explanation on these markers from the `git` documentation: - visit <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts> +LL | } + | - ^ + | | + | ...as it matches this but it has different indentation error: aborting due to 1 previous error |
