From d1d111d09edcab2c87bc59ea69989d708e6bfa2d Mon Sep 17 00:00:00 2001 From: LaurenΘ›iu Nicola Date: Wed, 8 Nov 2023 08:15:03 +0200 Subject: Merge commit '3b7c7f97e4a7bb253a8d398ee4f8346f6cf2817b' into sync-from-ra --- crates/parser/Cargo.toml | 5 +- crates/parser/src/grammar/params.rs | 9 +- crates/parser/src/lexed_str.rs | 95 ++++++++++++++++++++++ crates/parser/src/lib.rs | 1 + crates/parser/src/shortcuts.rs | 36 ++++---- .../test_data/lexer/err/byte_char_literals.rast | 86 ++++++++++++++++++++ .../test_data/lexer/err/byte_char_literals.rs | 44 ++++++++++ .../parser/test_data/lexer/err/byte_strings.rast | 28 +++++++ crates/parser/test_data/lexer/err/byte_strings.rs | 14 ++++ crates/parser/test_data/lexer/err/c_strings.rast | 28 +++++++ crates/parser/test_data/lexer/err/c_strings.rs | 14 ++++ .../parser/test_data/lexer/err/char_literals.rast | 92 +++++++++++++++++++++ crates/parser/test_data/lexer/err/char_literals.rs | 47 +++++++++++ crates/parser/test_data/lexer/err/strings.rast | 28 +++++++ crates/parser/test_data/lexer/err/strings.rs | 14 ++++ crates/parser/test_data/lexer/ok/byte_strings.rast | 6 -- crates/parser/test_data/lexer/ok/byte_strings.rs | 6 +- crates/parser/test_data/lexer/ok/chars.rast | 2 - crates/parser/test_data/lexer/ok/chars.rs | 2 +- .../parser/inline/err/0023_empty_param_slot.rast | 41 ++++++++++ .../parser/inline/err/0023_empty_param_slot.rs | 1 + 21 files changed, 566 insertions(+), 33 deletions(-) create mode 100644 crates/parser/test_data/lexer/err/byte_char_literals.rast create mode 100644 crates/parser/test_data/lexer/err/byte_char_literals.rs create mode 100644 crates/parser/test_data/lexer/err/byte_strings.rast create mode 100644 crates/parser/test_data/lexer/err/byte_strings.rs create mode 100644 crates/parser/test_data/lexer/err/c_strings.rast create mode 100644 crates/parser/test_data/lexer/err/c_strings.rs create mode 100644 crates/parser/test_data/lexer/err/char_literals.rast create mode 100644 crates/parser/test_data/lexer/err/char_literals.rs create mode 100644 crates/parser/test_data/lexer/err/strings.rast create mode 100644 crates/parser/test_data/lexer/err/strings.rs create mode 100644 crates/parser/test_data/parser/inline/err/0023_empty_param_slot.rast create mode 100644 crates/parser/test_data/parser/inline/err/0023_empty_param_slot.rs (limited to 'crates/parser') diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index 09e62c35278..efb326323f9 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -13,7 +13,7 @@ doctest = false [dependencies] drop_bomb = "0.1.5" -rustc_lexer.workspace = true +rustc-dependencies.workspace = true limit.workspace = true @@ -22,3 +22,6 @@ expect-test = "1.4.0" stdx.workspace = true sourcegen.workspace = true + +[features] +in-rust-tree = ["rustc-dependencies/in-rust-tree"] diff --git a/crates/parser/src/grammar/params.rs b/crates/parser/src/grammar/params.rs index 74eae9151a2..846da28cb01 100644 --- a/crates/parser/src/grammar/params.rs +++ b/crates/parser/src/grammar/params.rs @@ -7,6 +7,9 @@ use super::*; // fn b(x: i32) {} // fn c(x: i32, ) {} // fn d(x: i32, y: ()) {} + +// test_err empty_param_slot +// fn f(y: i32, ,t: i32) {} pub(super) fn param_list_fn_def(p: &mut Parser<'_>) { list_(p, Flavor::FnDef); } @@ -71,7 +74,11 @@ fn list_(p: &mut Parser<'_>, flavor: Flavor) { if !p.at_ts(PARAM_FIRST.union(ATTRIBUTE_FIRST)) { p.error("expected value parameter"); m.abandon(p); - break; + if p.eat(T![,]) { + continue; + } else { + break; + } } param(p, m, flavor); if !p.at(T![,]) { diff --git a/crates/parser/src/lexed_str.rs b/crates/parser/src/lexed_str.rs index 36c52953a02..b9e7566fdf9 100644 --- a/crates/parser/src/lexed_str.rs +++ b/crates/parser/src/lexed_str.rs @@ -8,8 +8,12 @@ //! Note that these tokens, unlike the tokens we feed into the parser, do //! include info about comments and whitespace. +use rustc_dependencies::lexer as rustc_lexer; + use std::ops; +use rustc_lexer::unescape::{EscapeError, Mode}; + use crate::{ SyntaxKind::{self, *}, T, @@ -253,30 +257,60 @@ impl<'a> Converter<'a> { rustc_lexer::LiteralKind::Char { terminated } => { if !terminated { err = "Missing trailing `'` symbol to terminate the character literal"; + } else { + let text = &self.res.text[self.offset + 1..][..len - 1]; + let i = text.rfind('\'').unwrap(); + let text = &text[..i]; + if let Err(e) = rustc_lexer::unescape::unescape_char(text) { + err = error_to_diagnostic_message(e, Mode::Char); + } } CHAR } rustc_lexer::LiteralKind::Byte { terminated } => { if !terminated { err = "Missing trailing `'` symbol to terminate the byte literal"; + } else { + let text = &self.res.text[self.offset + 2..][..len - 2]; + let i = text.rfind('\'').unwrap(); + let text = &text[..i]; + if let Err(e) = rustc_lexer::unescape::unescape_byte(text) { + err = error_to_diagnostic_message(e, Mode::Byte); + } } + BYTE } rustc_lexer::LiteralKind::Str { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the string literal"; + } else { + let text = &self.res.text[self.offset + 1..][..len - 1]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + err = unescape_string_error_message(text, Mode::Str); } STRING } rustc_lexer::LiteralKind::ByteStr { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the byte string literal"; + } else { + let text = &self.res.text[self.offset + 2..][..len - 2]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + err = unescape_string_error_message(text, Mode::ByteStr); } BYTE_STRING } rustc_lexer::LiteralKind::CStr { terminated } => { if !terminated { err = "Missing trailing `\"` symbol to terminate the string literal"; + } else { + let text = &self.res.text[self.offset + 2..][..len - 2]; + let i = text.rfind('"').unwrap(); + let text = &text[..i]; + err = unescape_string_error_message(text, Mode::CStr); } C_STRING } @@ -304,3 +338,64 @@ impl<'a> Converter<'a> { self.push(syntax_kind, len, err); } } + +fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str { + match error { + EscapeError::ZeroChars => "empty character literal", + EscapeError::MoreThanOneChar => "character literal may only contain one codepoint", + EscapeError::LoneSlash => "", + EscapeError::InvalidEscape if mode == Mode::Byte || mode == Mode::ByteStr => { + "unknown byte escape" + } + EscapeError::InvalidEscape => "unknown character escape", + EscapeError::BareCarriageReturn => "", + EscapeError::BareCarriageReturnInRawString => "", + EscapeError::EscapeOnlyChar if mode == Mode::Byte => "byte constant must be escaped", + EscapeError::EscapeOnlyChar => "character constant must be escaped", + EscapeError::TooShortHexEscape => "numeric character escape is too short", + EscapeError::InvalidCharInHexEscape => "invalid character in numeric character escape", + EscapeError::OutOfRangeHexEscape => "out of range hex escape", + EscapeError::NoBraceInUnicodeEscape => "incorrect unicode escape sequence", + EscapeError::InvalidCharInUnicodeEscape => "invalid character in unicode escape", + EscapeError::EmptyUnicodeEscape => "empty unicode escape", + EscapeError::UnclosedUnicodeEscape => "unterminated unicode escape", + EscapeError::LeadingUnderscoreUnicodeEscape => "invalid start of unicode escape", + EscapeError::OverlongUnicodeEscape => "overlong unicode escape", + EscapeError::LoneSurrogateUnicodeEscape => "invalid unicode character escape", + EscapeError::OutOfRangeUnicodeEscape => "invalid unicode character escape", + EscapeError::UnicodeEscapeInByte => "unicode escape in byte string", + EscapeError::NonAsciiCharInByte if mode == Mode::Byte => { + "non-ASCII character in byte literal" + } + EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => { + "non-ASCII character in byte string literal" + } + EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal", + EscapeError::UnskippedWhitespaceWarning => "", + EscapeError::MultipleSkippedLinesWarning => "", + } +} + +fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str { + let mut error_message = ""; + match mode { + Mode::CStr => { + rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| { + if let Err(e) = res { + error_message = error_to_diagnostic_message(e, mode); + } + }); + } + Mode::ByteStr | Mode::Str => { + rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| { + if let Err(e) = res { + error_message = error_to_diagnostic_message(e, mode); + } + }); + } + _ => { + // Other Modes are not supported yet or do not apply + } + } + error_message +} diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index c155e8aaf67..fcfd1a50719 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -19,6 +19,7 @@ #![warn(rust_2018_idioms, unused_lifetimes, semicolon_in_expressions_from_macros)] #![allow(rustdoc::private_intra_doc_links)] +#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))] mod lexed_str; mod token_set; diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index 2c47e3d086d..57005a6834c 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -32,29 +32,27 @@ impl LexedStr<'_> { let kind = self.kind(i); if kind.is_trivia() { was_joint = false + } else if kind == SyntaxKind::IDENT { + let token_text = self.text(i); + let contextual_kw = + SyntaxKind::from_contextual_keyword(token_text).unwrap_or(SyntaxKind::IDENT); + res.push_ident(contextual_kw); } else { - if kind == SyntaxKind::IDENT { - let token_text = self.text(i); - let contextual_kw = SyntaxKind::from_contextual_keyword(token_text) - .unwrap_or(SyntaxKind::IDENT); - res.push_ident(contextual_kw); - } else { - if was_joint { + if was_joint { + res.was_joint(); + } + res.push(kind); + // Tag the token as joint if it is float with a fractional part + // we use this jointness to inform the parser about what token split + // event to emit when we encounter a float literal in a field access + if kind == SyntaxKind::FLOAT_NUMBER { + if !self.text(i).ends_with('.') { res.was_joint(); - } - res.push(kind); - // Tag the token as joint if it is float with a fractional part - // we use this jointness to inform the parser about what token split - // event to emit when we encounter a float literal in a field access - if kind == SyntaxKind::FLOAT_NUMBER { - if !self.text(i).ends_with('.') { - res.was_joint(); - } else { - was_joint = false; - } } else { - was_joint = true; + was_joint = false; } + } else { + was_joint = true; } } } diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rast b/crates/parser/test_data/lexer/err/byte_char_literals.rast new file mode 100644 index 00000000000..7603c9099da --- /dev/null +++ b/crates/parser/test_data/lexer/err/byte_char_literals.rast @@ -0,0 +1,86 @@ +BYTE "b''" error: empty character literal +WHITESPACE "\n" +BYTE "b'\\'" error: Missing trailing `'` symbol to terminate the byte literal +WHITESPACE "\n" +BYTE "b'\n'" error: byte constant must be escaped +WHITESPACE "\n" +BYTE "b'spam'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\x0ff'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\\"a'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\na'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\ra'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\ta'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\\\a'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\'a'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\0a'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\u{0}x'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{1F63b}}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\v'" error: unknown byte escape +WHITESPACE "\n" +BYTE "b'\\πŸ’©'" error: unknown byte escape +WHITESPACE "\n" +BYTE "b'\\●'" error: unknown byte escape +WHITESPACE "\n" +BYTE "b'\\\\\\r'" error: character literal may only contain one codepoint +WHITESPACE "\n" +BYTE "b'\\x'" error: numeric character escape is too short +WHITESPACE "\n" +BYTE "b'\\x0'" error: numeric character escape is too short +WHITESPACE "\n" +BYTE "b'\\xf'" error: numeric character escape is too short +WHITESPACE "\n" +BYTE "b'\\xa'" error: numeric character escape is too short +WHITESPACE "\n" +BYTE "b'\\xx'" error: invalid character in numeric character escape +WHITESPACE "\n" +BYTE "b'\\xΡ‹'" error: invalid character in numeric character escape +WHITESPACE "\n" +BYTE "b'\\xπŸ¦€'" error: invalid character in numeric character escape +WHITESPACE "\n" +BYTE "b'\\xtt'" error: invalid character in numeric character escape +WHITESPACE "\n" +BYTE "b'\\u'" error: incorrect unicode escape sequence +WHITESPACE "\n" +BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence +WHITESPACE "\n" +BYTE "b'\\u{0x}'" error: invalid character in unicode escape +WHITESPACE "\n" +BYTE "b'\\u{'" error: unterminated unicode escape +WHITESPACE "\n" +BYTE "b'\\u{0000'" error: unterminated unicode escape +WHITESPACE "\n" +BYTE "b'\\u{}'" error: empty unicode escape +WHITESPACE "\n" +BYTE "b'\\u{_0000}'" error: invalid start of unicode escape +WHITESPACE "\n" +BYTE "b'\\u{0000000}'" error: overlong unicode escape +WHITESPACE "\n" +BYTE "b'\\u{FFFFFF}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{ffffff}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{ffffff}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{DC00}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{DDDD}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{DFFF}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{D800}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{DAAA}'" error: unicode escape in byte string +WHITESPACE "\n" +BYTE "b'\\u{DBFF}'" error: unicode escape in byte string +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/byte_char_literals.rs b/crates/parser/test_data/lexer/err/byte_char_literals.rs new file mode 100644 index 00000000000..b2d06e490bd --- /dev/null +++ b/crates/parser/test_data/lexer/err/byte_char_literals.rs @@ -0,0 +1,44 @@ +b'' +b'\' +b' +' +b'spam' +b'\x0ff' +b'\"a' +b'\na' +b'\ra' +b'\ta' +b'\\a' +b'\'a' +b'\0a' +b'\u{0}x' +b'\u{1F63b}}' +b'\v' +b'\πŸ’©' +b'\●' +b'\\\r' +b'\x' +b'\x0' +b'\xf' +b'\xa' +b'\xx' +b'\xΡ‹' +b'\xπŸ¦€' +b'\xtt' +b'\u' +b'\u[0123]' +b'\u{0x}' +b'\u{' +b'\u{0000' +b'\u{}' +b'\u{_0000}' +b'\u{0000000}' +b'\u{FFFFFF}' +b'\u{ffffff}' +b'\u{ffffff}' +b'\u{DC00}' +b'\u{DDDD}' +b'\u{DFFF}' +b'\u{D800}' +b'\u{DAAA}' +b'\u{DBFF}' diff --git a/crates/parser/test_data/lexer/err/byte_strings.rast b/crates/parser/test_data/lexer/err/byte_strings.rast new file mode 100644 index 00000000000..e8d8ff8cefb --- /dev/null +++ b/crates/parser/test_data/lexer/err/byte_strings.rast @@ -0,0 +1,28 @@ +BYTE_STRING "b\"\\πŸ’©\"" error: unknown byte escape +WHITESPACE "\n" +BYTE_STRING "b\"\\●\"" error: unknown byte escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +BYTE_STRING "b\"\\u{FFFFFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{ffffff}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DC00}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DDDD}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DFFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{D800}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DAAA}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\u{DBFF}\"" error: unicode escape in byte string +WHITESPACE "\n" +BYTE_STRING "b\"\\xΡ‹\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/byte_strings.rs b/crates/parser/test_data/lexer/err/byte_strings.rs new file mode 100644 index 00000000000..e74847137b1 --- /dev/null +++ b/crates/parser/test_data/lexer/err/byte_strings.rs @@ -0,0 +1,14 @@ +b"\πŸ’©" +b"\●" +b"\u{_0000}" +b"\u{0000000}" +b"\u{FFFFFF}" +b"\u{ffffff}" +b"\u{ffffff}" +b"\u{DC00}" +b"\u{DDDD}" +b"\u{DFFF}" +b"\u{D800}" +b"\u{DAAA}" +b"\u{DBFF}" +b"\xΡ‹" diff --git a/crates/parser/test_data/lexer/err/c_strings.rast b/crates/parser/test_data/lexer/err/c_strings.rast new file mode 100644 index 00000000000..1b4424ba5c7 --- /dev/null +++ b/crates/parser/test_data/lexer/err/c_strings.rast @@ -0,0 +1,28 @@ +C_STRING "c\"\\πŸ’©\"" error: unknown character escape +WHITESPACE "\n" +C_STRING "c\"\\●\"" error: unknown character escape +WHITESPACE "\n" +C_STRING "c\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +C_STRING "c\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +C_STRING "c\"\\u{FFFFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DC00}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DDDD}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{D800}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DAAA}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\u{DBFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +C_STRING "c\"\\xΡ‹\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/c_strings.rs b/crates/parser/test_data/lexer/err/c_strings.rs new file mode 100644 index 00000000000..1b78ffc28a0 --- /dev/null +++ b/crates/parser/test_data/lexer/err/c_strings.rs @@ -0,0 +1,14 @@ +c"\πŸ’©" +c"\●" +c"\u{_0000}" +c"\u{0000000}" +c"\u{FFFFFF}" +c"\u{ffffff}" +c"\u{ffffff}" +c"\u{DC00}" +c"\u{DDDD}" +c"\u{DFFF}" +c"\u{D800}" +c"\u{DAAA}" +c"\u{DBFF}" +c"\xΡ‹" diff --git a/crates/parser/test_data/lexer/err/char_literals.rast b/crates/parser/test_data/lexer/err/char_literals.rast new file mode 100644 index 00000000000..b1e1364d4c2 --- /dev/null +++ b/crates/parser/test_data/lexer/err/char_literals.rast @@ -0,0 +1,92 @@ +CHAR "'hello'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "''" error: empty character literal +WHITESPACE "\n" +CHAR "'\n'" error: character constant must be escaped +WHITESPACE "\n" +CHAR "'spam'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\x0ff'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\\"a'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\na'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\ra'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\ta'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\\\a'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\'a'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\0a'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\u{0}x'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\u{1F63b}}'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\v'" error: unknown character escape +WHITESPACE "\n" +CHAR "'\\πŸ’©'" error: unknown character escape +WHITESPACE "\n" +CHAR "'\\●'" error: unknown character escape +WHITESPACE "\n" +CHAR "'\\\\\\r'" error: character literal may only contain one codepoint +WHITESPACE "\n" +CHAR "'\\x'" error: numeric character escape is too short +WHITESPACE "\n" +CHAR "'\\x0'" error: numeric character escape is too short +WHITESPACE "\n" +CHAR "'\\xf'" error: numeric character escape is too short +WHITESPACE "\n" +CHAR "'\\xa'" error: numeric character escape is too short +WHITESPACE "\n" +CHAR "'\\xx'" error: invalid character in numeric character escape +WHITESPACE "\n" +CHAR "'\\xΡ‹'" error: invalid character in numeric character escape +WHITESPACE "\n" +CHAR "'\\xπŸ¦€'" error: invalid character in numeric character escape +WHITESPACE "\n" +CHAR "'\\xtt'" error: invalid character in numeric character escape +WHITESPACE "\n" +CHAR "'\\xff'" error: out of range hex escape +WHITESPACE "\n" +CHAR "'\\xFF'" error: out of range hex escape +WHITESPACE "\n" +CHAR "'\\x80'" error: out of range hex escape +WHITESPACE "\n" +CHAR "'\\u'" error: incorrect unicode escape sequence +WHITESPACE "\n" +CHAR "'\\u[0123]'" error: incorrect unicode escape sequence +WHITESPACE "\n" +CHAR "'\\u{0x}'" error: invalid character in unicode escape +WHITESPACE "\n" +CHAR "'\\u{'" error: unterminated unicode escape +WHITESPACE "\n" +CHAR "'\\u{0000'" error: unterminated unicode escape +WHITESPACE "\n" +CHAR "'\\u{}'" error: empty unicode escape +WHITESPACE "\n" +CHAR "'\\u{_0000}'" error: invalid start of unicode escape +WHITESPACE "\n" +CHAR "'\\u{0000000}'" error: overlong unicode escape +WHITESPACE "\n" +CHAR "'\\u{FFFFFF}'" error: invalid unicode character escape +WHITESPACE "\n" +CHAR "'\\u{ffffff}'" error: invalid unicode character escape +WHITESPACE "\n" +CHAR "'\\u{ffffff}'" error: invalid unicode character escape +WHITESPACE "\n" +CHAR "'\\u{DC00}'" error: invalid unicode character escape +WHITESPACE "\n" +CHAR "'\\u{DDDD}'" error: invalid unicode character escape +WHITESPACE "\n" +CHAR "'\\u{DFFF}'" error: invalid unicode character escape +WHITESPACE "\n" +CHAR "'\\u{D800}'" error: invalid unicode character escape +WHITESPACE "\n" +CHAR "'\\u{DAAA}'" error: invalid unicode character escape +WHITESPACE "\n" +CHAR "'\\u{DBFF}'" error: invalid unicode character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/char_literals.rs b/crates/parser/test_data/lexer/err/char_literals.rs new file mode 100644 index 00000000000..291f99d8020 --- /dev/null +++ b/crates/parser/test_data/lexer/err/char_literals.rs @@ -0,0 +1,47 @@ +'hello' +'' +' +' +'spam' +'\x0ff' +'\"a' +'\na' +'\ra' +'\ta' +'\\a' +'\'a' +'\0a' +'\u{0}x' +'\u{1F63b}}' +'\v' +'\πŸ’©' +'\●' +'\\\r' +'\x' +'\x0' +'\xf' +'\xa' +'\xx' +'\xΡ‹' +'\xπŸ¦€' +'\xtt' +'\xff' +'\xFF' +'\x80' +'\u' +'\u[0123]' +'\u{0x}' +'\u{' +'\u{0000' +'\u{}' +'\u{_0000}' +'\u{0000000}' +'\u{FFFFFF}' +'\u{ffffff}' +'\u{ffffff}' +'\u{DC00}' +'\u{DDDD}' +'\u{DFFF}' +'\u{D800}' +'\u{DAAA}' +'\u{DBFF}' diff --git a/crates/parser/test_data/lexer/err/strings.rast b/crates/parser/test_data/lexer/err/strings.rast new file mode 100644 index 00000000000..0cd1747208e --- /dev/null +++ b/crates/parser/test_data/lexer/err/strings.rast @@ -0,0 +1,28 @@ +STRING "\"\\πŸ’©\"" error: unknown character escape +WHITESPACE "\n" +STRING "\"\\●\"" error: unknown character escape +WHITESPACE "\n" +STRING "\"\\u{_0000}\"" error: invalid start of unicode escape +WHITESPACE "\n" +STRING "\"\\u{0000000}\"" error: overlong unicode escape +WHITESPACE "\n" +STRING "\"\\u{FFFFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{ffffff}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DC00}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DDDD}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DFFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{D800}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DAAA}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\u{DBFF}\"" error: invalid unicode character escape +WHITESPACE "\n" +STRING "\"\\xΡ‹\"" error: invalid character in numeric character escape +WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/err/strings.rs b/crates/parser/test_data/lexer/err/strings.rs new file mode 100644 index 00000000000..2499516d3fa --- /dev/null +++ b/crates/parser/test_data/lexer/err/strings.rs @@ -0,0 +1,14 @@ +"\πŸ’©" +"\●" +"\u{_0000}" +"\u{0000000}" +"\u{FFFFFF}" +"\u{ffffff}" +"\u{ffffff}" +"\u{DC00}" +"\u{DDDD}" +"\u{DFFF}" +"\u{D800}" +"\u{DAAA}" +"\u{DBFF}" +"\xΡ‹" diff --git a/crates/parser/test_data/lexer/ok/byte_strings.rast b/crates/parser/test_data/lexer/ok/byte_strings.rast index c848ac368e4..fd20ca57ac6 100644 --- a/crates/parser/test_data/lexer/ok/byte_strings.rast +++ b/crates/parser/test_data/lexer/ok/byte_strings.rast @@ -1,13 +1,9 @@ -BYTE "b''" -WHITESPACE " " BYTE "b'x'" WHITESPACE " " BYTE_STRING "b\"foo\"" WHITESPACE " " BYTE_STRING "br\"\"" WHITESPACE "\n" -BYTE "b''suf" -WHITESPACE " " BYTE_STRING "b\"\"ix" WHITESPACE " " BYTE_STRING "br\"\"br" @@ -17,6 +13,4 @@ WHITESPACE " " BYTE "b'\\\\'" WHITESPACE " " BYTE "b'\\''" -WHITESPACE " " -BYTE "b'hello'" WHITESPACE "\n" diff --git a/crates/parser/test_data/lexer/ok/byte_strings.rs b/crates/parser/test_data/lexer/ok/byte_strings.rs index b54930f5e69..65460d02cb2 100644 --- a/crates/parser/test_data/lexer/ok/byte_strings.rs +++ b/crates/parser/test_data/lexer/ok/byte_strings.rs @@ -1,3 +1,3 @@ -b'' b'x' b"foo" br"" -b''suf b""ix br""br -b'\n' b'\\' b'\'' b'hello' +b'x' b"foo" br"" +b""ix br""br +b'\n' b'\\' b'\'' diff --git a/crates/parser/test_data/lexer/ok/chars.rast b/crates/parser/test_data/lexer/ok/chars.rast index 66e58cc298f..07172a4ecc0 100644 --- a/crates/parser/test_data/lexer/ok/chars.rast +++ b/crates/parser/test_data/lexer/ok/chars.rast @@ -4,8 +4,6 @@ CHAR "' '" WHITESPACE " " CHAR "'0'" WHITESPACE " " -CHAR "'hello'" -WHITESPACE " " CHAR "'\\x7f'" WHITESPACE " " CHAR "'\\n'" diff --git a/crates/parser/test_data/lexer/ok/chars.rs b/crates/parser/test_data/lexer/ok/chars.rs index 454ee0a5f61..15f52c113c1 100644 --- a/crates/parser/test_data/lexer/ok/chars.rs +++ b/crates/parser/test_data/lexer/ok/chars.rs @@ -1 +1 @@ -'x' ' ' '0' 'hello' '\x7f' '\n' '\\' '\'' +'x' ' ' '0' '\x7f' '\n' '\\' '\'' diff --git a/crates/parser/test_data/parser/inline/err/0023_empty_param_slot.rast b/crates/parser/test_data/parser/inline/err/0023_empty_param_slot.rast new file mode 100644 index 00000000000..39e35a81ee2 --- /dev/null +++ b/crates/parser/test_data/parser/inline/err/0023_empty_param_slot.rast @@ -0,0 +1,41 @@ +SOURCE_FILE + FN + FN_KW "fn" + WHITESPACE " " + NAME + IDENT "f" + PARAM_LIST + L_PAREN "(" + PARAM + IDENT_PAT + NAME + IDENT "y" + COLON ":" + WHITESPACE " " + PATH_TYPE + PATH + PATH_SEGMENT + NAME_REF + IDENT "i32" + COMMA "," + WHITESPACE " " + COMMA "," + PARAM + IDENT_PAT + NAME + IDENT "t" + COLON ":" + WHITESPACE " " + PATH_TYPE + PATH + PATH_SEGMENT + NAME_REF + IDENT "i32" + R_PAREN ")" + WHITESPACE " " + BLOCK_EXPR + STMT_LIST + L_CURLY "{" + R_CURLY "}" + WHITESPACE "\n" +error 12: expected value parameter diff --git a/crates/parser/test_data/parser/inline/err/0023_empty_param_slot.rs b/crates/parser/test_data/parser/inline/err/0023_empty_param_slot.rs new file mode 100644 index 00000000000..0adf7b8d2f0 --- /dev/null +++ b/crates/parser/test_data/parser/inline/err/0023_empty_param_slot.rs @@ -0,0 +1 @@ +fn f(y: i32, ,t: i32) {} -- cgit 1.4.1-3-g733a5