diff options
| author | Dylan DPC <99973273+Dylan-DPC@users.noreply.github.com> | 2022-11-08 11:23:51 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-11-08 11:23:51 +0530 |
| commit | 4946ee7c8fd9a6b5b7e506373950cac57a4e8015 (patch) | |
| tree | 7fed610bfab0aebbd6df7c16f391be76c9fbb502 | |
| parent | b695ed3f2032d349a5cb9f26a8df67936943c075 (diff) | |
| parent | f5e390e8631a759579674b0899087a51bb073dd3 (diff) | |
| download | rust-4946ee7c8fd9a6b5b7e506373950cac57a4e8015.tar.gz rust-4946ee7c8fd9a6b5b7e506373950cac57a4e8015.zip | |
Rollup merge of #103651 - Alexendoo:parse-format-unicode-escapes, r=wesleywiser
Fix `rustc_parse_format` spans following escaped utf-8 multibyte chars
Currently too many skips are created for char escapes that are larger than 1 byte when encoded in UTF-8, [playground:](https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=c77a9dc669b69b167271b59ed2c8d88c)
```rust
fn main() {
format!("\u{df}{a}");
format!("\u{211d}{a}");
format!("\u{1f4a3}{a}");
}
```
```
error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope
--> src/main.rs:2:22
|
2 | format!("\u{df}{a}");
| ^ not found in this scope
error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope
--> src/main.rs:3:25
|
3 | format!("\u{211d}{a}");
| ^ not found in this scope
error[[E0425]](https://doc.rust-lang.org/stable/error-index.html#E0425): cannot find value `a` in this scope
--> src/main.rs:4:27
|
4 | format!("\u{1f4a3}{a}");
| ^ not found in this scope
```
This reduces the number of skips to account for that
Fixes https://github.com/rust-lang/rust-clippy/issues/9727
| -rw-r--r-- | compiler/rustc_parse_format/src/lib.rs | 47 | ||||
| -rw-r--r-- | src/test/ui/fmt/unicode-escape-spans.rs | 19 | ||||
| -rw-r--r-- | src/test/ui/fmt/unicode-escape-spans.stderr | 63 |
3 files changed, 111 insertions, 18 deletions
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 1394993abad..54bf4d1d6b7 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -819,19 +819,19 @@ fn find_skips_from_snippet( }; fn find_skips(snippet: &str, is_raw: bool) -> Vec<usize> { - let mut s = snippet.char_indices().peekable(); + let mut s = snippet.char_indices(); let mut skips = vec![]; while let Some((pos, c)) = s.next() { - match (c, s.peek()) { + match (c, s.clone().next()) { // skip whitespace and empty lines ending in '\\' ('\\', Some((next_pos, '\n'))) if !is_raw => { skips.push(pos); - skips.push(*next_pos); + skips.push(next_pos); let _ = s.next(); - while let Some((pos, c)) = s.peek() { + while let Some((pos, c)) = s.clone().next() { if matches!(c, ' ' | '\n' | '\t') { - skips.push(*pos); + skips.push(pos); let _ = s.next(); } else { break; @@ -839,7 +839,7 @@ fn find_skips_from_snippet( } } ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { - skips.push(*next_pos); + skips.push(next_pos); let _ = s.next(); } ('\\', Some((_, 'x'))) if !is_raw => { @@ -858,19 +858,30 @@ fn find_skips_from_snippet( } if let Some((next_pos, next_c)) = s.next() { if next_c == '{' { - skips.push(next_pos); - let mut i = 0; // consume up to 6 hexanumeric chars + closing `}` - while let (Some((next_pos, c)), true) = (s.next(), i < 7) { - if c.is_digit(16) { - skips.push(next_pos); - } else if c == '}' { - skips.push(next_pos); - break; - } else { - break; - } - i += 1; + // consume up to 6 hexanumeric chars + let digits_len = + s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count(); + + let len_utf8 = s + .as_str() + .get(..digits_len) + .and_then(|digits| u32::from_str_radix(digits, 16).ok()) + .and_then(char::from_u32) + .map_or(1, char::len_utf8); + + // Skip the digits, for chars that encode to more than 1 utf-8 byte + // exclude as many digits as it is greater than 1 byte + // + // So for a 3 byte character, exclude 2 digits + let required_skips = + digits_len.saturating_sub(len_utf8.saturating_sub(1)); + + // skip '{' and '}' also + for pos in (next_pos..).take(required_skips + 2) { + skips.push(pos) } + + s.nth(digits_len); } else if next_c.is_digit(16) { skips.push(next_pos); // We suggest adding `{` and `}` when appropriate, accept it here as if diff --git a/src/test/ui/fmt/unicode-escape-spans.rs b/src/test/ui/fmt/unicode-escape-spans.rs new file mode 100644 index 00000000000..753d91ce58e --- /dev/null +++ b/src/test/ui/fmt/unicode-escape-spans.rs @@ -0,0 +1,19 @@ +fn main() { + // 1 byte in UTF-8 + format!("\u{000041}{a}"); //~ ERROR cannot find value + format!("\u{0041}{a}"); //~ ERROR cannot find value + format!("\u{41}{a}"); //~ ERROR cannot find value + format!("\u{0}{a}"); //~ ERROR cannot find value + + // 2 bytes + format!("\u{0df}{a}"); //~ ERROR cannot find value + format!("\u{df}{a}"); //~ ERROR cannot find value + + // 3 bytes + format!("\u{00211d}{a}"); //~ ERROR cannot find value + format!("\u{211d}{a}"); //~ ERROR cannot find value + + // 4 bytes + format!("\u{1f4a3}{a}"); //~ ERROR cannot find value + format!("\u{10ffff}{a}"); //~ ERROR cannot find value +} diff --git a/src/test/ui/fmt/unicode-escape-spans.stderr b/src/test/ui/fmt/unicode-escape-spans.stderr new file mode 100644 index 00000000000..1d8473f01b8 --- /dev/null +++ b/src/test/ui/fmt/unicode-escape-spans.stderr @@ -0,0 +1,63 @@ +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:3:25 + | +LL | format!("\u{000041}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:4:23 + | +LL | format!("\u{0041}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:5:21 + | +LL | format!("\u{41}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:6:20 + | +LL | format!("\u{0}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:9:22 + | +LL | format!("\u{0df}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:10:21 + | +LL | format!("\u{df}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:13:25 + | +LL | format!("\u{00211d}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:14:23 + | +LL | format!("\u{211d}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:17:24 + | +LL | format!("\u{1f4a3}{a}"); + | ^ not found in this scope + +error[E0425]: cannot find value `a` in this scope + --> $DIR/unicode-escape-spans.rs:18:25 + | +LL | format!("\u{10ffff}{a}"); + | ^ not found in this scope + +error: aborting due to 10 previous errors + +For more information about this error, try `rustc --explain E0425`. |
