about summary refs log tree commit diff
path: root/compiler/rustc_parse/src/lexer/mod.rs
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-01-14 14:50:53 +0000
committerbors <bors@rust-lang.org>2023-01-14 14:50:53 +0000
commit4b51adf6ffa1ae2286ea436eb48cbaa8771886e6 (patch)
tree4228def3b668261be93c3f176e32ab7deb4431da /compiler/rustc_parse/src/lexer/mod.rs
parent44a500c8c187b245638684748f54bd6ec67e0b25 (diff)
parent4173c7cc0f128d30269224351af2bc655905700f (diff)
downloadrust-4b51adf6ffa1ae2286ea436eb48cbaa8771886e6.tar.gz
rust-4b51adf6ffa1ae2286ea436eb48cbaa8771886e6.zip
Auto merge of #106851 - matthiaskrgr:rollup-d9dz3yp, r=matthiaskrgr
Rollup of 10 pull requests

Successful merges:

 - #106046 (Fix mir-opt tests for big-endian platforms)
 - #106470 (tidy: Don't include wasm32 in compiler dependency check)
 - #106566 (Emit a single error for contiguous sequences of unknown tokens)
 - #106644 (Update the wasi-libc used for the wasm32-wasi target)
 - #106665 (Add note when `FnPtr` vs. `FnDef` impl trait)
 - #106752 (Emit a hint for bad call return types due to generic arguments)
 - #106788 (Tweak E0599 and elaborate_predicates)
 - #106831 (Use GitHub yaml templates for ICE, Docs and Diagnostics tickets)
 - #106846 (Improve some comments and names in parser)
 - #106848 (Fix wrong path in triage bot autolabel for wg-trait-solver-refactor)

Failed merges:

r? `@ghost`
`@rustbot` modify labels: rollup
Diffstat (limited to 'compiler/rustc_parse/src/lexer/mod.rs')
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs28
1 files changed, 24 insertions, 4 deletions
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index f027843e6b4..8761c23625b 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -79,7 +79,7 @@ impl<'a> StringReader<'a> {
     /// preceded by whitespace.
     fn next_token(&mut self) -> (Token, bool) {
         let mut preceded_by_whitespace = false;
-
+        let mut swallow_next_invalid = 0;
         // Skip trivial (whitespace & comments) tokens
         loop {
             let token = self.cursor.advance_token();
@@ -232,19 +232,34 @@ impl<'a> StringReader<'a> {
                 rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
 
                 rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
-                    let c = self.str_from(start).chars().next().unwrap();
+                    // Don't emit diagnostics for sequences of the same invalid token
+                    if swallow_next_invalid > 0 {
+                        swallow_next_invalid -= 1;
+                        continue;
+                    }
+                    let mut it = self.str_from_to_end(start).chars();
+                    let c = it.next().unwrap();
+                    let repeats = it.take_while(|c1| *c1 == c).count();
                     let mut err =
-                        self.struct_err_span_char(start, self.pos, "unknown start of token", c);
+                        self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
                     // FIXME: the lexer could be used to turn the ASCII version of unicode
                     // homoglyphs, instead of keeping a table in `check_for_substitution`into the
                     // token. Ideally, this should be inside `rustc_lexer`. However, we should
                     // first remove compound tokens like `<<` from `rustc_lexer`, and then add
                     // fancier error recovery to it, as there will be less overall work to do this
                     // way.
-                    let token = unicode_chars::check_for_substitution(self, start, c, &mut err);
+                    let token = unicode_chars::check_for_substitution(self, start, c, &mut err, repeats+1);
                     if c == '\x00' {
                         err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
                     }
+                    if repeats > 0 {
+                        if repeats == 1 {
+                            err.note(format!("character appears once more"));
+                        } else {
+                            err.note(format!("character appears {repeats} more times"));
+                        }
+                        swallow_next_invalid = repeats;
+                    }
                     err.emit();
                     if let Some(token) = token {
                         token
@@ -486,6 +501,11 @@ impl<'a> StringReader<'a> {
         &self.src[self.src_index(start)..self.src_index(end)]
     }
 
+    /// Slice of the source text spanning from `start` until the end
+    fn str_from_to_end(&self, start: BytePos) -> &str {
+        &self.src[self.src_index(start)..]
+    }
+
     fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! {
         match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) {
             Err(RawStrError::InvalidStarter { bad_char }) => {