about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPol Valletbó <pol.valletbo@glovoapp.com>2023-10-11 15:25:52 +0200
committerPol Valletbó <pol.valletbo@glovoapp.com>2023-10-11 15:25:52 +0200
commite1aeb7fa794e228ca9099ac7679e8a1d0b22238a (patch)
tree2ee4568013650c49b3119fba5ab8d6466d6928a7
parent677e6f3439b91769a9b54c18afe5c136c14d9e8a (diff)
downloadrust-e1aeb7fa794e228ca9099ac7679e8a1d0b22238a.tar.gz
rust-e1aeb7fa794e228ca9099ac7679e8a1d0b22238a.zip
fix: handle errors for string byte string and c_string
-rw-r--r--crates/parser/src/lexed_str.rs42
-rw-r--r--crates/parser/test_data/lexer/err/byte_strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/byte_strings.rs14
-rw-r--r--crates/parser/test_data/lexer/err/c_strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/c_strings.rs14
-rw-r--r--crates/parser/test_data/lexer/err/strings.rast28
-rw-r--r--crates/parser/test_data/lexer/err/strings.rs14
7 files changed, 167 insertions, 1 deletions
diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs
index 84cedc1fa3f..c2e25daf37f 100644
--- a/crates/parser/src/lexed_str.rs
+++ b/crates/parser/src/lexed_str.rs
@@ -8,7 +8,10 @@
 //! Note that these tokens, unlike the tokens we feed into the parser, do
 //! include info about comments and whitespace.
 
-use rustc_dependencies::lexer as rustc_lexer;
+use rustc_dependencies::lexer::{
+    self as rustc_lexer,
+    unescape::{unescape_c_string, unescape_literal},
+};
 
 use std::ops;
 
@@ -284,18 +287,45 @@ impl<'a> Converter<'a> {
             rustc_lexer::LiteralKind::Str { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 1..][..len - 1];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    rustc_lexer::unescape::unescape_literal(text, Mode::Str, &mut |_, res| {
+                        if let Err(e) = res {
+                            err = error_to_diagnostic_message(e, Mode::Str);
+                        }
+                    });
                 }
                 STRING
             }
             rustc_lexer::LiteralKind::ByteStr { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the byte string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    rustc_lexer::unescape::unescape_literal(text, Mode::ByteStr, &mut |_, res| {
+                        if let Err(e) = res {
+                            err = error_to_diagnostic_message(e, Mode::ByteStr);
+                        }
+                    })
                 }
                 BYTE_STRING
             }
             rustc_lexer::LiteralKind::CStr { terminated } => {
                 if !terminated {
                     err = "Missing trailing `\"` symbol to terminate the string literal";
+                } else {
+                    let text = &self.res.text[self.offset + 2..][..len - 2];
+                    let i = text.rfind('"').unwrap();
+                    let text = &text[..i];
+                    rustc_lexer::unescape::unescape_c_string(text, Mode::CStr, &mut |_, res| {
+                        if let Err(e) = res {
+                            err = error_to_diagnostic_message(e, Mode::CStr);
+                        }
+                    })
                 }
                 C_STRING
             }
@@ -360,3 +390,13 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
         EscapeError::MultipleSkippedLinesWarning => "",
     }
 }
+
+fn fill_unescape_string_error(text: &str, mode: Mode, mut error_message: &str) {
+
+    rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
+        if let Err(e) = res {
+            error_message = error_to_diagnostic_message(e, mode);
+        }
+    });
+}
+
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rast b/crates/parser/test_data/lexer/err/byte_strings.rast
new file mode 100644
index 00000000000..e8d8ff8cefb
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rast
@@ -0,0 +1,28 @@
+BYTE_STRING "b\"\\💩\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\●\"" error: unknown byte escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string
+WHITESPACE "\n"
+BYTE_STRING "b\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/byte_strings.rs b/crates/parser/test_data/lexer/err/byte_strings.rs
new file mode 100644
index 00000000000..e74847137b1
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/byte_strings.rs
@@ -0,0 +1,14 @@
+b"\💩"
+b"\●"
+b"\u{_0000}"
+b"\u{0000000}"
+b"\u{FFFFFF}"
+b"\u{ffffff}"
+b"\u{ffffff}"
+b"\u{DC00}"
+b"\u{DDDD}"
+b"\u{DFFF}"
+b"\u{D800}"
+b"\u{DAAA}"
+b"\u{DBFF}"
+b"\xы"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rast b/crates/parser/test_data/lexer/err/c_strings.rast
new file mode 100644
index 00000000000..1b4424ba5c7
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rast
@@ -0,0 +1,28 @@
+C_STRING "c\"\\💩\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\●\"" error: unknown character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+C_STRING "c\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/c_strings.rs b/crates/parser/test_data/lexer/err/c_strings.rs
new file mode 100644
index 00000000000..1b78ffc28a0
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/c_strings.rs
@@ -0,0 +1,14 @@
+c"\💩"
+c"\●"
+c"\u{_0000}"
+c"\u{0000000}"
+c"\u{FFFFFF}"
+c"\u{ffffff}"
+c"\u{ffffff}"
+c"\u{DC00}"
+c"\u{DDDD}"
+c"\u{DFFF}"
+c"\u{D800}"
+c"\u{DAAA}"
+c"\u{DBFF}"
+c"\xы"
diff --git a/crates/parser/test_data/lexer/err/strings.rast b/crates/parser/test_data/lexer/err/strings.rast
new file mode 100644
index 00000000000..0cd1747208e
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rast
@@ -0,0 +1,28 @@
+STRING "\"\\💩\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\●\"" error: unknown character escape
+WHITESPACE "\n"
+STRING "\"\\u{_0000}\"" error: invalid start of unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{0000000}\"" error: overlong unicode escape
+WHITESPACE "\n"
+STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{ffffff}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DC00}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DDDD}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DFFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{D800}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DAAA}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\u{DBFF}\"" error: invalid unicode character escape
+WHITESPACE "\n"
+STRING "\"\\xы\"" error: invalid character in numeric character escape
+WHITESPACE "\n"
diff --git a/crates/parser/test_data/lexer/err/strings.rs b/crates/parser/test_data/lexer/err/strings.rs
new file mode 100644
index 00000000000..2499516d3fa
--- /dev/null
+++ b/crates/parser/test_data/lexer/err/strings.rs
@@ -0,0 +1,14 @@
+"\💩"
+"\●"
+"\u{_0000}"
+"\u{0000000}"
+"\u{FFFFFF}"
+"\u{ffffff}"
+"\u{ffffff}"
+"\u{DC00}"
+"\u{DDDD}"
+"\u{DFFF}"
+"\u{D800}"
+"\u{DAAA}"
+"\u{DBFF}"
+"\xы"