about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-10-16 18:23:22 +0000
committerbors <bors@rust-lang.org>2023-10-16 18:23:22 +0000
commitd6afb4fa239fe7b5b34e5cefa9e58148fdff65b8 (patch)
tree71a70eccd3ae8c21b8e5fb147b5b2097abfcfc58
parent2910dbf12d78bbcdbde8ec5d83bb07947c8352ea (diff)
parent6845c80a2fd52e2d8c58bda0e55c39c4bb836ad1 (diff)
downloadrust-d6afb4fa239fe7b5b34e5cefa9e58148fdff65b8.tar.gz
rust-d6afb4fa239fe7b5b34e5cefa9e58148fdff65b8.zip
Auto merge of #15746 - pvalletbo:string-literals-diagnose, r=Veykril
String literals diagnose

Continues the work from #15744 to add diagnosis errors to Str, ByteStr, and CStr literal kinds.

Also replaces `unescape_char` for `unescape_byte` to use the correct method for Byte literals.
-rw-r--r--crates/parser/src/lexed_str.rs41
-rw-r--r--crates/parser/test_data/lexer/err/byte_char_literals.rast28
-rw-r--r--crates/parser/test_data/lexer/err/byte_char_literals.rs3
-rw-r--r--crates/parser/test_data/lexer/err/byte_strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/byte_strings.rs14
-rw-r--r--crates/parser/test_data/lexer/err/c_strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/c_strings.rs14
-rw-r--r--crates/parser/test_data/lexer/err/strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/strings.rs14
9 files changed, 177 insertions, 21 deletions
diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 8e8bdce1eef..b9e7566fdf9 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -274,7 +274,7 @@ impl<'a> Converter<'a> {
                     let text = &self.res.text[self.offset + 2..][..len - 2];
                     let i = text.rfind('\'').unwrap();
                     let text = &text[..i];
-                    if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
+                    if let Err(e) = rustc_lexer::unescape::unescape_byte(text) {
                         err = error_to_diagnostic_message(e, Mode::Byte);
                     }
                 }
@@ -284,18 +284,33 @@ impl<'a> Converter<'a> {
             rustc_lexer::LiteralKind::Str { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 1..][..len - 1];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    err = unescape_string_error_message(text, Mode::Str);
                 }
                 STRING
             }
             rustc_lexer::LiteralKind::ByteStr { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the byte string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    err = unescape_string_error_message(text, Mode::ByteStr);
                 }
                 BYTE_STRING
             }
             rustc_lexer::LiteralKind::CStr { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    err = unescape_string_error_message(text, Mode::CStr);
                 }
                 C_STRING
             }
@@ -360,3 +375,27 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
         EscapeError::MultipleSkippedLinesWarning => "",
     }
 }
+
+fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
+    let mut error_message = "";
+    match mode {
+        Mode::CStr => {
+            rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
+                if let Err(e) = res {
+                    error_message = error_to_diagnostic_message(e, mode);
+                }
+            });
+        }
+        Mode::ByteStr | Mode::Str => {
+            rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| {
+                if let Err(e) = res {
+                    error_message = error_to_diagnostic_message(e, mode);
+                }
+            });
+        }
+        _ => {
+            // Other Modes are not supported yet or do not apply
+        }
+    }
+    error_message
+}
diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rast b/crates/parser/test_data/lexer/err/byte_char_literals.rast
index 24892bc2394..7603c9099da 100644
--- a/crates/parser/test_data/lexer/err/byte_char_literals.rast
+++ b/crates/parser/test_data/lexer/err/byte_char_literals.rast
@@ -22,9 +22,9 @@ BYTE "b'\\'a'" error: character literal may only contain one codepoint
 WHITESPACE "\n"
 BYTE "b'\\0a'" error: character literal may only contain one codepoint
 WHITESPACE "\n"
-BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint
+BYTE "b'\\u{0}x'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint
+BYTE "b'\\u{1F63b}}'" error: unicode escape in byte string
 WHITESPACE "\n"
 BYTE "b'\\v'" error: unknown byte escape
 WHITESPACE "\n"
@@ -50,12 +50,6 @@ BYTE "b'\\x🦀'" error: invalid character in numeric character escape
 WHITESPACE "\n"
 BYTE "b'\\xtt'" error: invalid character in numeric character escape
 WHITESPACE "\n"
-BYTE "b'\\xff'" error: out of range hex escape
-WHITESPACE "\n"
-BYTE "b'\\xFF'" error: out of range hex escape
-WHITESPACE "\n"
-BYTE "b'\\x80'" error: out of range hex escape
-WHITESPACE "\n"
 BYTE "b'\\u'" error: incorrect unicode escape sequence
 WHITESPACE "\n"
 BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence
@@ -72,21 +66,21 @@ BYTE "b'\\u{_0000}'" error: invalid start of unicode escape
 WHITESPACE "\n"
 BYTE "b'\\u{0000000}'" error: overlong unicode escape
 WHITESPACE "\n"
-BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape
+BYTE "b'\\u{FFFFFF}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
+BYTE "b'\\u{ffffff}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
+BYTE "b'\\u{ffffff}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DC00}'" error: invalid unicode character escape
+BYTE "b'\\u{DC00}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DDDD}'" error: invalid unicode character escape
+BYTE "b'\\u{DDDD}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DFFF}'" error: invalid unicode character escape
+BYTE "b'\\u{DFFF}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{D800}'" error: invalid unicode character escape
+BYTE "b'\\u{D800}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DAAA}'" error: invalid unicode character escape
+BYTE "b'\\u{DAAA}'" error: unicode escape in byte string
 WHITESPACE "\n"
-BYTE "b'\\u{DBFF}'" error: invalid unicode character escape
+BYTE "b'\\u{DBFF}'" error: unicode escape in byte string
 WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rs b/crates/parser/test_data/lexer/err/byte_char_literals.rs
index 9f2f4309e76..b2d06e490bd 100644
--- a/crates/parser/test_data/lexer/err/byte_char_literals.rs
+++ b/crates/parser/test_data/lexer/err/byte_char_literals.rs
@@ -25,9 +25,6 @@ b'\xx'
 b'\xы'
 b'\x🦀'
 b'\xtt'
-b'\xff'
-b'\xFF'
-b'\x80'
 b'\u'
 b'\u[0123]'
 b'\u{0x}'
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rast b/crates/parser/test_data/lexer/err/byte_strings.rast
new file mode 100644
index 00000000000..e8d8ff8cefb
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rast
@@ -0,0 +1,28 @@
+BYTE_STRING "b\"\\💩\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\●\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rs b/crates/parser/test_data/lexer/err/byte_strings.rs
new file mode 100644
index 00000000000..e74847137b1
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rs
@@ -0,0 +1,14 @@
+b"\💩"
+b"\●"
+b"\u{_0000}"
+b"\u{0000000}"
+b"\u{FFFFFF}"
+b"\u{ffffff}"
+b"\u{ffffff}"
+b"\u{DC00}"
+b"\u{DDDD}"
+b"\u{DFFF}"
+b"\u{D800}"
+b"\u{DAAA}"
+b"\u{DBFF}"
+b"\xы"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rast b/crates/parser/test_data/lexer/err/c_strings.rast
new file mode 100644
index 00000000000..1b4424ba5c7
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rast
@@ -0,0 +1,28 @@
+C_STRING "c\"\\💩\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\●\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rs b/crates/parser/test_data/lexer/err/c_strings.rs
new file mode 100644
index 00000000000..1b78ffc28a0
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rs
@@ -0,0 +1,14 @@
+c"\💩"
+c"\●"
+c"\u{_0000}"
+c"\u{0000000}"
+c"\u{FFFFFF}"
+c"\u{ffffff}"
+c"\u{ffffff}"
+c"\u{DC00}"
+c"\u{DDDD}"
+c"\u{DFFF}"
+c"\u{D800}"
+c"\u{DAAA}"
+c"\u{DBFF}"
+c"\xы"
diff --git a/crates/parser/test_data/lexer/err/strings.rast b/crates/parser/test_data/lexer/err/strings.rast
new file mode 100644
index 00000000000..0cd1747208e
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rast
@@ -0,0 +1,28 @@
+STRING "\"\\💩\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\●\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/strings.rs b/crates/parser/test_data/lexer/err/strings.rs
new file mode 100644
index 00000000000..2499516d3fa
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rs
@@ -0,0 +1,14 @@
+"\💩"
+"\●"
+"\u{_0000}"
+"\u{0000000}"
+"\u{FFFFFF}"
+"\u{ffffff}"
+"\u{ffffff}"
+"\u{DC00}"
+"\u{DDDD}"
+"\u{DFFF}"
+"\u{D800}"
+"\u{DAAA}"
+"\u{DBFF}"
+"\xы"