Make non-ASCII errors more consistent.

There are three kinds of "byte" literals: byte literals, byte string literals, and raw byte string literals. None are allowed to have non-ASCII chars in them. Two `EscapeError` variants exist for when that constraint is violated. - `NonAsciiCharInByte`: used for byte literals and byte string literals. - `NonAsciiCharInByteString`: used for raw byte string literals. As a result, the messages for raw byte string literals use different wording, without good reason. Also, byte string literals are incorrectly described as "byte constants" in some error messages. This commit eliminates `NonAsciiCharInByteString` so the three cases are handled similarly, and described correctly. The `mode` is enough to distinguish them. Note: Some existing error messages mention "byte constants" and some mention "byte literals". I went with the latter here, because it's a more correct name, as used by the Reference.
author: Nicholas Nethercote <n.nethercote@gmail.com> 2022-11-03 15:17:37 +1100
committer: Nicholas Nethercote <n.nethercote@gmail.com> 2022-11-04 14:23:40 +1100
commit: 7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438 (patch)
tree: 95b18ce5f8af6693f1484490d128a95b919fd38b /compiler/rustc_parse/src
parent: 34b32b0dac9da3fad7861bdc2bad89d771172bb3 (diff)
download: rust-7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438.tar.gz
rust-7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438.zip
1 files changed, 13 insertions, 19 deletions
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 055ee98a00a..6373f5b4fd6 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error(
                 .emit();
         }
         EscapeError::NonAsciiCharInByte => {
-            assert!(mode.is_byte());
             let (c, span) = last_char();
-            let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
+            let desc = match mode {
+                Mode::Byte => "byte literal",
+                Mode::ByteStr => "byte string literal",
+                Mode::RawByteStr => "raw byte string literal",
+                _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
+            };
+            let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
             let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
                 format!(" but is {:?}", c)
             } else {
                 String::new()
             };
-            err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
-            if (c as u32) <= 0xFF {
+            err.span_label(span, &format!("must be ASCII{}", postfix));
+            // Note: the \\xHH suggestions are not given for raw byte string
+            // literals, because they are araw and so cannot use any escapes.
+            if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
                 err.span_suggestion(
                     span,
                     &format!(
@@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error(
                     format!("\\x{:X}", c as u32),
                     Applicability::MaybeIncorrect,
                 );
-            } else if matches!(mode, Mode::Byte) {
+            } else if mode == Mode::Byte {
                 err.span_label(span, "this multibyte character does not fit into a single byte");
-            } else if matches!(mode, Mode::ByteStr) {
+            } else if mode != Mode::RawByteStr {
                 let mut utf8 = String::new();
                 utf8.push(c);
                 err.span_suggestion(
@@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error(
             }
             err.emit();
         }
-        EscapeError::NonAsciiCharInByteString => {
-            assert!(mode.is_byte());
-            let (c, span) = last_char();
-            let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
-                format!(" but is {:?}", c)
-            } else {
-                String::new()
-            };
-            handler
-                .struct_span_err(span, "raw byte string must be ASCII")
-                .span_label(span, &format!("must be ASCII{}", postfix))
-                .emit();
-        }
         EscapeError::OutOfRangeHexEscape => {
             handler
                 .struct_span_err(span, "out of range hex escape")
author	Nicholas Nethercote <n.nethercote@gmail.com>	2022-11-03 15:17:37 +1100
committer	Nicholas Nethercote <n.nethercote@gmail.com>	2022-11-04 14:23:40 +1100
commit	7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438 (patch)
tree	95b18ce5f8af6693f1484490d128a95b919fd38b /compiler/rustc_parse/src
parent	34b32b0dac9da3fad7861bdc2bad89d771172bb3 (diff)
download	rust-7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438.tar.gz rust-7dbf2c0ed86a6fc97aa0b93bc2ac865d6f2cc438.zip