about summary refs log tree commit diff
diff options
context:
space:
mode:
authorDylan DPC <99973273+Dylan-DPC@users.noreply.github.com>2022-11-08 11:23:51 +0530
committerGitHub <noreply@github.com>2022-11-08 11:23:51 +0530
commit4946ee7c8fd9a6b5b7e506373950cac57a4e8015 (patch)
tree7fed610bfab0aebbd6df7c16f391be76c9fbb502
parentb695ed3f2032d349a5cb9f26a8df67936943c075 (diff)
parentf5e390e8631a759579674b0899087a51bb073dd3 (diff)
downloadrust-4946ee7c8fd9a6b5b7e506373950cac57a4e8015.tar.gz
rust-4946ee7c8fd9a6b5b7e506373950cac57a4e8015.zip
Rollup merge of #103651 - Alexendoo:parse-format-unicode-escapes, r=wesleywiser
Fix `rustc_parse_format` spans following escaped utf-8 multibyte chars

Currently too many skips are created for char escapes that are larger than 1 byte when encoded in UTF-8, [playground:](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=c77a9dc669b69b167271b59ed2c8d88c)

```rust
fn main() {
    format!("\u{df}{a}");
    format!("\u{211d}{a}");
    format!("\u{1f4a3}{a}");
}
```
```
error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope
 --> src/main.rs:2:22
  |
2 |     format!("\u{df}{a}");
  |                      ^ not found in this scope

error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope
 --> src/main.rs:3:25
  |
3 |     format!("\u{211d}{a}");
  |                         ^ not found in this scope

error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope
 --> src/main.rs:4:27
  |
4 |     format!("\u{1f4a3}{a}");
  |                           ^ not found in this scope
```

This reduces the number of skips to account for that

Fixes https://github.com/rust-lang/rust-clippy/issues/9727
-rw-r--r--compiler/rustc_parse_format/src/lib.rs47
-rw-r--r--src/test/ui/fmt/unicode-escape-spans.rs19
-rw-r--r--src/test/ui/fmt/unicode-escape-spans.stderr63
3 files changed, 111 insertions, 18 deletions
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs
index 1394993abad..54bf4d1d6b7 100644
--- a/compiler/rustc_parse_format/src/lib.rs
+++ b/compiler/rustc_parse_format/src/lib.rs
@@ -819,19 +819,19 @@ fn find_skips_from_snippet(
     };
 
     fn find_skips(snippet: &str, is_raw: bool) -> Vec<usize> {
-        let mut s = snippet.char_indices().peekable();
+        let mut s = snippet.char_indices();
         let mut skips = vec![];
         while let Some((pos, c)) = s.next() {
-            match (c, s.peek()) {
+            match (c, s.clone().next()) {
                 // skip whitespace and empty lines ending in '\\'
                 ('\\', Some((next_pos, '\n'))) if !is_raw => {
                     skips.push(pos);
-                    skips.push(*next_pos);
+                    skips.push(next_pos);
                     let _ = s.next();
 
-                    while let Some((pos, c)) = s.peek() {
+                    while let Some((pos, c)) = s.clone().next() {
                         if matches!(c, ' ' | '\n' | '\t') {
-                            skips.push(*pos);
+                            skips.push(pos);
                             let _ = s.next();
                         } else {
                             break;
@@ -839,7 +839,7 @@ fn find_skips_from_snippet(
                     }
                 }
                 ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
-                    skips.push(*next_pos);
+                    skips.push(next_pos);
                     let _ = s.next();
                 }
                 ('\\', Some((_, 'x'))) if !is_raw => {
@@ -858,19 +858,30 @@ fn find_skips_from_snippet(
                     }
                     if let Some((next_pos, next_c)) = s.next() {
                         if next_c == '{' {
-                            skips.push(next_pos);
-                            let mut i = 0; // consume up to 6 hexanumeric chars + closing `}`
-                            while let (Some((next_pos, c)), true) = (s.next(), i < 7) {
-                                if c.is_digit(16) {
-                                    skips.push(next_pos);
-                                } else if c == '}' {
-                                    skips.push(next_pos);
-                                    break;
-                                } else {
-                                    break;
-                                }
-                                i += 1;
+                            // consume up to 6 hexanumeric chars
+                            let digits_len =
+                                s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count();
+
+                            let len_utf8 = s
+                                .as_str()
+                                .get(..digits_len)
+                                .and_then(|digits| u32::from_str_radix(digits, 16).ok())
+                                .and_then(char::from_u32)
+                                .map_or(1, char::len_utf8);
+
+                            // Skip the digits, for chars that encode to more than 1 utf-8 byte
+                            // exclude as many digits as it is greater than 1 byte
+                            //
+                            // So for a 3 byte character, exclude 2 digits
+                            let required_skips =
+                                digits_len.saturating_sub(len_utf8.saturating_sub(1));
+
+                            // skip '{' and '}' also
+                            for pos in (next_pos..).take(required_skips + 2) {
+                                skips.push(pos)
                             }
+
+                            s.nth(digits_len);
                         } else if next_c.is_digit(16) {
                             skips.push(next_pos);
                             // We suggest adding `{` and `}` when appropriate, accept it here as if
diff --git a/src/test/ui/fmt/unicode-escape-spans.rs b/src/test/ui/fmt/unicode-escape-spans.rs
new file mode 100644
index 00000000000..753d91ce58e
--- /dev/null
+++ b/src/test/ui/fmt/unicode-escape-spans.rs
@@ -0,0 +1,19 @@
+fn main() {
+    // 1 byte in UTF-8
+    format!("\u{000041}{a}"); //~ ERROR cannot find value
+    format!("\u{0041}{a}"); //~ ERROR cannot find value
+    format!("\u{41}{a}"); //~ ERROR cannot find value
+    format!("\u{0}{a}"); //~ ERROR cannot find value
+
+    // 2 bytes
+    format!("\u{0df}{a}"); //~ ERROR cannot find value
+    format!("\u{df}{a}"); //~ ERROR cannot find value
+
+    // 3 bytes
+    format!("\u{00211d}{a}"); //~ ERROR cannot find value
+    format!("\u{211d}{a}"); //~ ERROR cannot find value
+
+    // 4 bytes
+    format!("\u{1f4a3}{a}"); //~ ERROR cannot find value
+    format!("\u{10ffff}{a}"); //~ ERROR cannot find value
+}
diff --git a/src/test/ui/fmt/unicode-escape-spans.stderr b/src/test/ui/fmt/unicode-escape-spans.stderr
new file mode 100644
index 00000000000..1d8473f01b8
--- /dev/null
+++ b/src/test/ui/fmt/unicode-escape-spans.stderr
@@ -0,0 +1,63 @@
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:3:25
+   |
+LL |     format!("\u{000041}{a}");
+   |                         ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:4:23
+   |
+LL |     format!("\u{0041}{a}");
+   |                       ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:5:21
+   |
+LL |     format!("\u{41}{a}");
+   |                     ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:6:20
+   |
+LL |     format!("\u{0}{a}");
+   |                    ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:9:22
+   |
+LL |     format!("\u{0df}{a}");
+   |                      ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:10:21
+   |
+LL |     format!("\u{df}{a}");
+   |                     ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:13:25
+   |
+LL |     format!("\u{00211d}{a}");
+   |                         ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:14:23
+   |
+LL |     format!("\u{211d}{a}");
+   |                       ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:17:24
+   |
+LL |     format!("\u{1f4a3}{a}");
+   |                        ^ not found in this scope
+
+error[E0425]: cannot find value `a` in this scope
+  --> $DIR/unicode-escape-spans.rs:18:25
+   |
+LL |     format!("\u{10ffff}{a}");
+   |                         ^ not found in this scope
+
+error: aborting due to 10 previous errors
+
+For more information about this error, try `rustc --explain E0425`.