diff options
| author | Ralf Jung <post@ralfj.de> | 2023-01-15 16:46:17 -0500 |
|---|---|---|
| committer | Ralf Jung <post@ralfj.de> | 2023-01-15 16:46:17 -0500 |
| commit | b23795229781a2125fade078fa610bc47cc75b16 (patch) | |
| tree | c57813bd1e93312f265ead2df13e5a9f885b17e9 /compiler/rustc_parse/src | |
| parent | ec5f8253e28a660034a3936a171f93a3c063a0cc (diff) | |
| parent | fa2ff4d7e5a88bfda186f0f3f621402470745788 (diff) | |
| download | rust-b23795229781a2125fade078fa610bc47cc75b16.tar.gz rust-b23795229781a2125fade078fa610bc47cc75b16.zip | |
Merge from rustc
Diffstat (limited to 'compiler/rustc_parse/src')
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 28 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/unicode_chars.rs | 10 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 35 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/pat.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/stmt.rs | 10 |
5 files changed, 60 insertions, 25 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index f027843e6b4..8761c23625b 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -79,7 +79,7 @@ impl<'a> StringReader<'a> { /// preceded by whitespace. fn next_token(&mut self) -> (Token, bool) { let mut preceded_by_whitespace = false; - + let mut swallow_next_invalid = 0; // Skip trivial (whitespace & comments) tokens loop { let token = self.cursor.advance_token(); @@ -232,19 +232,34 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => { - let c = self.str_from(start).chars().next().unwrap(); + // Don't emit diagnostics for sequences of the same invalid token + if swallow_next_invalid > 0 { + swallow_next_invalid -= 1; + continue; + } + let mut it = self.str_from_to_end(start).chars(); + let c = it.next().unwrap(); + let repeats = it.take_while(|c1| *c1 == c).count(); let mut err = - self.struct_err_span_char(start, self.pos, "unknown start of token", c); + self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c); // FIXME: the lexer could be used to turn the ASCII version of unicode // homoglyphs, instead of keeping a table in `check_for_substitution`into the // token. Ideally, this should be inside `rustc_lexer`. However, we should // first remove compound tokens like `<<` from `rustc_lexer`, and then add // fancier error recovery to it, as there will be less overall work to do this // way. - let token = unicode_chars::check_for_substitution(self, start, c, &mut err); + let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1); if c == '\x00' { err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used"); } + if repeats > 0 { + if repeats == 1 { + err.note(format!("character appears once more")); + } else { + err.note(format!("character appears {repeats} more times")); + } + swallow_next_invalid = repeats; + } err.emit(); if let Some(token) = token { token @@ -486,6 +501,11 @@ impl<'a> StringReader<'a> { &self.src[self.src_index(start)..self.src_index(end)] } + /// Slice of the source text spanning from `start` until the end + fn str_from_to_end(&self, start: BytePos) -> &str { + &self.src[self.src_index(start)..] + } + fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! { match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) { Err(RawStrError::InvalidStarter { bad_char }) => { diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs index f1b50296e25..65479b341d7 100644 --- a/compiler/rustc_parse/src/lexer/unicode_chars.rs +++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs @@ -337,10 +337,11 @@ pub(super) fn check_for_substitution<'a>( pos: BytePos, ch: char, err: &mut Diagnostic, + count: usize, ) -> Option<token::TokenKind> { let &(_u_char, u_name, ascii_char) = UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch)?; - let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8())); + let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8() * count)); let Some((_ascii_char, ascii_name, token)) = ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) else { let msg = format!("substitution character not found for '{}'", ch); @@ -369,7 +370,12 @@ pub(super) fn check_for_substitution<'a>( "Unicode character '{}' ({}) looks like '{}' ({}), but it is not", ch, u_name, ascii_char, ascii_name ); - err.span_suggestion(span, &msg, ascii_char, Applicability::MaybeIncorrect); + err.span_suggestion( + span, + &msg, + ascii_char.to_string().repeat(count), + Applicability::MaybeIncorrect, + ); } token.clone() } diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index dd2b03988c3..d58afcd4c9f 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -83,7 +83,7 @@ macro_rules! maybe_whole_expr { pub(super) enum LhsExpr { NotYetParsed, AttributesParsed(AttrWrapper), - AlreadyParsed(P<Expr>, bool), // (expr, starts_statement) + AlreadyParsed { expr: P<Expr>, starts_statement: bool }, } impl From<Option<AttrWrapper>> for LhsExpr { @@ -97,11 +97,11 @@ impl From<Option<AttrWrapper>> for LhsExpr { } impl From<P<Expr>> for LhsExpr { - /// Converts the `expr: P<Expr>` into `LhsExpr::AlreadyParsed(expr)`. + /// Converts the `expr: P<Expr>` into `LhsExpr::AlreadyParsed { expr, starts_statement: false }`. /// /// This conversion does not allocate. fn from(expr: P<Expr>) -> Self { - LhsExpr::AlreadyParsed(expr, false) + LhsExpr::AlreadyParsed { expr, starts_statement: false } } } @@ -174,7 +174,7 @@ impl<'a> Parser<'a> { lhs: LhsExpr, ) -> PResult<'a, P<Expr>> { let mut starts_stmt = false; - let mut lhs = if let LhsExpr::AlreadyParsed(expr, starts_statement) = lhs { + let mut lhs = if let LhsExpr::AlreadyParsed { expr, starts_statement } = lhs { starts_stmt = starts_statement; expr } else { @@ -562,17 +562,23 @@ impl<'a> Parser<'a> { // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() match this.token.uninterpolate().kind { - token::Not => make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Not)), // `!expr` - token::Tilde => make_it!(this, attrs, |this, _| this.recover_tilde_expr(lo)), // `~expr` + // `!expr` + token::Not => make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Not)), + // `~expr` + token::Tilde => make_it!(this, attrs, |this, _| this.recover_tilde_expr(lo)), + // `-expr` token::BinOp(token::Minus) => { make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Neg)) - } // `-expr` + } + // `*expr` token::BinOp(token::Star) => { make_it!(this, attrs, |this, _| this.parse_unary_expr(lo, UnOp::Deref)) - } // `*expr` + } + // `&expr` and `&&expr` token::BinOp(token::And) | token::AndAnd => { make_it!(this, attrs, |this, _| this.parse_borrow_expr(lo)) } + // `+lit` token::BinOp(token::Plus) if this.look_ahead(1, |tok| tok.is_numeric_lit()) => { let mut err = LeadingPlusNotSupported { span: lo, remove_plus: None, add_parentheses: None }; @@ -587,7 +593,7 @@ impl<'a> Parser<'a> { this.bump(); this.parse_prefix_expr(None) - } // `+expr` + } // Recover from `++x`: token::BinOp(token::Plus) if this.look_ahead(1, |t| *t == token::BinOp(token::Plus)) => @@ -624,7 +630,7 @@ impl<'a> Parser<'a> { Ok((span, self.mk_unary(op, expr))) } - // Recover on `!` suggesting for bitwise negation instead. + /// Recover on `~expr` in favor of `!expr`. fn recover_tilde_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { self.sess.emit_err(TildeAsUnaryOperator(lo)); @@ -651,7 +657,6 @@ impl<'a> Parser<'a> { /// Recover on `not expr` in favor of `!expr`. fn recover_not_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { - // Emit the error... let negated_token = self.look_ahead(1, |t| t.clone()); let sub_diag = if negated_token.is_numeric_lit() { @@ -672,7 +677,6 @@ impl<'a> Parser<'a> { ), }); - // ...and recover! self.parse_unary_expr(lo, UnOp::Not) } @@ -1471,9 +1475,8 @@ impl<'a> Parser<'a> { } else if self.eat(&token::Comma) { // Vector with two or more elements. let sep = SeqSep::trailing_allowed(token::Comma); - let (remaining_exprs, _) = self.parse_seq_to_end(close, sep, |p| p.parse_expr())?; - let mut exprs = vec![first_expr]; - exprs.extend(remaining_exprs); + let (mut exprs, _) = self.parse_seq_to_end(close, sep, |p| p.parse_expr())?; + exprs.insert(0, first_expr); ExprKind::Array(exprs) } else { // Vector with one element @@ -1593,7 +1596,7 @@ impl<'a> Parser<'a> { vis.0 }; - // Suggestion involves adding a (as of time of writing this, unstable) labeled block. + // Suggestion involves adding a labeled block. // // If there are no breaks that may use this label, suggest removing the label and // recover to the unmodified expression. diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index 0b057f2f577..e73a17ced7d 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -469,7 +469,7 @@ impl<'a> Parser<'a> { /// Try to recover the more general form `intersect ::= $pat_lhs @ $pat_rhs`. /// /// Allowed binding patterns generated by `binding ::= ref? mut? $ident @ $pat_rhs` - /// should already have been parsed by now at this point, + /// should already have been parsed by now at this point, /// if the next token is `@` then we can try to parse the more general form. /// /// Consult `parse_pat_ident` for the `binding` grammar. diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index 1e5c2834960..4ff9927aab5 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -164,7 +164,10 @@ impl<'a> Parser<'a> { // Perform this outside of the `collect_tokens_trailing_token` closure, // since our outer attributes do not apply to this part of the expression let expr = self.with_res(Restrictions::STMT_EXPR, |this| { - this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr, true)) + this.parse_assoc_expr_with( + 0, + LhsExpr::AlreadyParsed { expr, starts_statement: true }, + ) })?; Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr))) } else { @@ -198,7 +201,10 @@ impl<'a> Parser<'a> { let e = self.mk_expr(lo.to(hi), ExprKind::MacCall(mac)); let e = self.maybe_recover_from_bad_qpath(e)?; let e = self.parse_dot_or_call_expr_with(e, lo, attrs)?; - let e = self.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(e, false))?; + let e = self.parse_assoc_expr_with( + 0, + LhsExpr::AlreadyParsed { expr: e, starts_statement: false }, + )?; StmtKind::Expr(e) }; Ok(self.mk_stmt(lo.to(hi), kind)) |
