diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 93 | ||||
| -rw-r--r-- | compiler/rustc_lint/messages.ftl | 3 | ||||
| -rw-r--r-- | compiler/rustc_lint/src/context/diagnostics.rs | 3 | ||||
| -rw-r--r-- | compiler/rustc_lint/src/lints.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_lint_defs/src/builtin.rs | 41 | ||||
| -rw-r--r-- | compiler/rustc_lint_defs/src/lib.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse/messages.ftl | 4 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/errors.rs | 18 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 84 | ||||
| -rw-r--r-- | compiler/rustc_serialize/src/serialize.rs | 6 |
10 files changed, 249 insertions, 12 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 60aab668cba..b0ab50dd773 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -104,6 +104,12 @@ pub enum TokenKind { /// for emoji identifier recovery, as those are not meant to be ever accepted. InvalidPrefix, + /// Guarded string literal prefix: `#"` or `##`. + /// + /// Used for reserving "guarded strings" (RFC 3598) in edition 2024. + /// Split into the component tokens on older editions. + GuardedStrPrefix, + /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid /// suffix, but may be present here on string and float literals. Users of /// this type will need to check for and reject that case. @@ -191,30 +197,41 @@ pub enum DocStyle { /// `rustc_ast::ast::LitKind`). #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum LiteralKind { - /// "12_u8", "0o100", "0b120i99", "1f32". + /// `12_u8`, `0o100`, `0b120i99`, `1f32`. Int { base: Base, empty_int: bool }, - /// "12.34f32", "1e3", but not "1f32". + /// `12.34f32`, `1e3`, but not `1f32`. Float { base: Base, empty_exponent: bool }, - /// "'a'", "'\\'", "'''", "';" + /// `'a'`, `'\\'`, `'''`, `';` Char { terminated: bool }, - /// "b'a'", "b'\\'", "b'''", "b';" + /// `b'a'`, `b'\\'`, `b'''`, `b';` Byte { terminated: bool }, - /// ""abc"", ""abc" + /// `"abc"`, `"abc` Str { terminated: bool }, - /// "b"abc"", "b"abc" + /// `b"abc"`, `b"abc` ByteStr { terminated: bool }, /// `c"abc"`, `c"abc` CStr { terminated: bool }, - /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates + /// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates /// an invalid literal. RawStr { n_hashes: Option<u8> }, - /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None` + /// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None` /// indicates an invalid literal. RawByteStr { n_hashes: Option<u8> }, /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal. RawCStr { n_hashes: Option<u8> }, } +/// `#"abc"#`, `##"a"` (fewer closing), or even `#"a` (unterminated). +/// +/// Can capture fewer closing hashes than starting hashes, +/// for more efficient lexing and better backwards diagnostics. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct GuardedStr { + pub n_hashes: u32, + pub terminated: bool, + pub token_len: u32, +} + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum RawStrError { /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##` @@ -403,6 +420,12 @@ impl Cursor<'_> { TokenKind::Literal { kind: literal_kind, suffix_start } } + // Guarded string literal prefix: `#"` or `##` + '#' if matches!(self.first(), '"' | '#') => { + self.bump(); + TokenKind::GuardedStrPrefix + } + // One-symbol tokens. ';' => Semi, ',' => Comma, @@ -780,6 +803,60 @@ impl Cursor<'_> { false } + /// Attempt to lex for a guarded string literal. + /// + /// Used by `rustc_parse::lexer` to lex for guarded strings + /// conditionally based on edition. + /// + /// Note: this will not reset the `Cursor` when a + /// guarded string is not found. It is the caller's + /// responsibility to do so. + pub fn guarded_double_quoted_string(&mut self) -> Option<GuardedStr> { + debug_assert!(self.prev() != '#'); + + let mut n_start_hashes: u32 = 0; + while self.first() == '#' { + n_start_hashes += 1; + self.bump(); + } + + if self.first() != '"' { + return None; + } + self.bump(); + debug_assert!(self.prev() == '"'); + + // Lex the string itself as a normal string literal + // so we can recover that for older editions later. + let terminated = self.double_quoted_string(); + if !terminated { + let token_len = self.pos_within_token(); + self.reset_pos_within_token(); + + return Some(GuardedStr { n_hashes: n_start_hashes, terminated: false, token_len }); + } + + // Consume closing '#' symbols. + // Note that this will not consume extra trailing `#` characters: + // `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3, .. }` + // followed by a `#` token. + let mut n_end_hashes = 0; + while self.first() == '#' && n_end_hashes < n_start_hashes { + n_end_hashes += 1; + self.bump(); + } + + // Reserved syntax, always an error, so it doesn't matter if + // `n_start_hashes != n_end_hashes`. + + self.eat_literal_suffix(); + + let token_len = self.pos_within_token(); + self.reset_pos_within_token(); + + Some(GuardedStr { n_hashes: n_start_hashes, terminated: true, token_len }) + } + /// Eats the double-quoted string and returns `n_hashes` and an error if encountered. fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> { // Wrap the actual function to handle the error with too many hashes. diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index d3799594871..e733e92c7cb 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -740,6 +740,9 @@ lint_reserved_prefix = prefix `{$prefix}` is unknown .label = unknown prefix .suggestion = insert whitespace here to avoid this being parsed as a prefix in Rust 2021 +lint_reserved_string = will be parsed as a guarded string in Rust 2024 + .suggestion = insert whitespace here to avoid this being parsed as a guarded string in Rust 2024 + lint_shadowed_into_iter = this method call resolves to `<&{$target} as IntoIterator>::into_iter` (due to backwards compatibility), but will resolve to `<{$target} as IntoIterator>::into_iter` in Rust {$edition} .use_iter_suggestion = use `.iter()` instead of `.into_iter()` to avoid ambiguity diff --git a/compiler/rustc_lint/src/context/diagnostics.rs b/compiler/rustc_lint/src/context/diagnostics.rs index b5ab56912cb..565c3c04252 100644 --- a/compiler/rustc_lint/src/context/diagnostics.rs +++ b/compiler/rustc_lint/src/context/diagnostics.rs @@ -176,6 +176,9 @@ pub(super) fn decorate_lint(sess: &Session, diagnostic: BuiltinLintDiag, diag: & lints::RawPrefix { label: label_span, suggestion: label_span.shrink_to_hi() } .decorate_lint(diag); } + BuiltinLintDiag::ReservedString(suggestion) => { + lints::ReservedString { suggestion }.decorate_lint(diag); + } BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => { lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag); } diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index a861894a444..87afeca0b28 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -3053,3 +3053,10 @@ pub(crate) enum MutRefSugg { #[derive(LintDiagnostic)] #[diag(lint_unqualified_local_imports)] pub(crate) struct UnqualifiedLocalImportsDiag {} + +#[derive(LintDiagnostic)] +#[diag(lint_reserved_string)] +pub(crate) struct ReservedString { + #[suggestion(code = " ", applicability = "machine-applicable")] + pub suggestion: Span, +} diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs index a123059df8f..827791c54be 100644 --- a/compiler/rustc_lint_defs/src/builtin.rs +++ b/compiler/rustc_lint_defs/src/builtin.rs @@ -92,6 +92,7 @@ declare_lint_pass! { RUST_2021_INCOMPATIBLE_OR_PATTERNS, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2021_PRELUDE_COLLISIONS, + RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, RUST_2024_INCOMPATIBLE_PAT, RUST_2024_PRELUDE_COLLISIONS, SELF_CONSTRUCTOR_FROM_OUTER_ITEM, @@ -4996,3 +4997,43 @@ declare_lint! { Warn, "detects pointer to integer transmutes in const functions and associated constants", } + +declare_lint! { + /// The `rust_2024_guarded_string_incompatible_syntax` lint detects `#` tokens + /// that will be parsed as part of a guarded string literal in Rust 2024. + /// + /// ### Example + /// + /// ```rust,edition2021,compile_fail + /// #![deny(rust_2024_guarded_string_incompatible_syntax)] + /// + /// macro_rules! m { + /// (# $x:expr #) => (); + /// (# $x:expr) => (); + /// } + /// + /// m!(#"hey"#); + /// m!(#"hello"); + /// ``` + /// + /// {{produces}} + /// + /// ### Explanation + /// + /// Prior to Rust 2024, `#"hey"#` is three tokens: the first `#` + /// followed by the string literal `"hey"` then the final `#`. + /// In Rust 2024, the whole sequence is considered a single token. + /// + /// This lint suggests to add whitespace between the leading `#` + /// and the string to keep them separated in Rust 2024. + // Allow this lint -- rustdoc doesn't yet support threading edition into this lint's parser. + #[allow(rustdoc::invalid_rust_codeblocks)] + pub RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, + Allow, + "will be parsed as a guarded string in Rust 2024", + @future_incompatible = FutureIncompatibleInfo { + reason: FutureIncompatibilityReason::EditionError(Edition::Edition2024), + reference: "issue #123735 <https://github.com/rust-lang/rust/issues/123735>", + }; + crate_level_only +} diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs index 386918a5c41..c01fa5c54d6 100644 --- a/compiler/rustc_lint_defs/src/lib.rs +++ b/compiler/rustc_lint_defs/src/lib.rs @@ -614,6 +614,8 @@ pub enum BuiltinLintDiag { ReservedPrefix(Span, String), /// `'r#` in edition < 2021. RawPrefix(Span), + /// `##` or `#"` is edition < 2024. + ReservedString(Span), TrailingMacro(bool, Ident), BreakWithLabelAndLoop(Span), UnicodeTextFlow(Span, String), diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index 948199fd55c..ba5e2ddf4fc 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -706,6 +706,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow .label = the label .suggestion = add `:` after the label +parse_reserved_string = invalid string literal + .note = unprefixed guarded string literals are reserved for future use since Rust 2024 + .suggestion_whitespace = consider inserting whitespace here + parse_return_types_use_thin_arrow = return types are denoted using `->` .suggestion = use `->` instead diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index dade3912751..124975f67f1 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -2111,6 +2111,24 @@ pub(crate) enum UnknownPrefixSugg { } #[derive(Diagnostic)] +#[diag(parse_reserved_string)] +#[note] +pub(crate) struct ReservedString { + #[primary_span] + pub span: Span, + #[subdiagnostic] + pub sugg: Option<GuardedStringSugg>, +} +#[derive(Subdiagnostic)] +#[suggestion( + parse_suggestion_whitespace, + code = " ", + applicability = "maybe-incorrect", + style = "verbose" +)] +pub(crate) struct GuardedStringSugg(#[primary_span] pub Span); + +#[derive(Diagnostic)] #[diag(parse_too_many_hashes)] pub(crate) struct TooManyHashes { #[primary_span] diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 3e46fc93fa4..d627ef3d2cb 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -10,7 +10,8 @@ use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ - RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, + TEXT_DIRECTION_CODEPOINT_IN_COMMENT, }; use rustc_session::parse::ParseSess; use rustc_span::symbol::Symbol; @@ -251,6 +252,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let prefix_span = self.mk_sp(start, lit_start); return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace); } + rustc_lexer::TokenKind::GuardedStrPrefix => self.maybe_report_guarded_str(start, str_before), rustc_lexer::TokenKind::Literal { kind, suffix_start } => { let suffix_start = start + BytePos(suffix_start); let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); @@ -781,6 +783,86 @@ impl<'psess, 'src> StringReader<'psess, 'src> { } } + /// Detect guarded string literal syntax + /// + /// RFC 3598 reserved this syntax for future use. As of Rust 2024, + /// using this syntax produces an error. In earlier editions, however, it + /// only results in an (allowed by default) lint, and is treated as + /// separate tokens. + fn maybe_report_guarded_str(&mut self, start: BytePos, str_before: &'src str) -> TokenKind { + let span = self.mk_sp(start, self.pos); + let edition2024 = span.edition().at_least_rust_2024(); + + let space_pos = start + BytePos(1); + let space_span = self.mk_sp(space_pos, space_pos); + + let mut cursor = Cursor::new(str_before); + + let (span, unterminated) = match cursor.guarded_double_quoted_string() { + Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => { + let end = start + BytePos(token_len); + let span = self.mk_sp(start, end); + let str_start = start + BytePos(n_hashes); + + if edition2024 { + self.cursor = cursor; + self.pos = end; + } + + let unterminated = if terminated { None } else { Some(str_start) }; + + (span, unterminated) + } + _ => { + // We should only get here in the `##+` case. + debug_assert_eq!(self.str_from_to(start, start + BytePos(2)), "##"); + + (span, None) + } + }; + if edition2024 { + if let Some(str_start) = unterminated { + // Only a fatal error if string is unterminated. + self.dcx() + .struct_span_fatal( + self.mk_sp(str_start, self.pos), + "unterminated double quote string", + ) + .with_code(E0765) + .emit() + } + + let sugg = if span.from_expansion() { + None + } else { + Some(errors::GuardedStringSugg(space_span)) + }; + + // In Edition 2024 and later, emit a hard error. + let err = self.dcx().emit_err(errors::ReservedString { span, sugg }); + + token::Literal(token::Lit { + kind: token::Err(err), + symbol: self.symbol_from_to(start, self.pos), + suffix: None, + }) + } else { + // Before Rust 2024, only emit a lint for migration. + self.psess.buffer_lint( + RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, + span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::ReservedString(space_span), + ); + + // For backwards compatibility, roll back to after just the first `#` + // and return the `Pound` token. + self.pos = start + BytePos(1); + self.cursor = Cursor::new(&str_before[1..]); + token::Pound + } + } + fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! { self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num }); } diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs index 2d37f5bdab8..bae5f259fcc 100644 --- a/compiler/rustc_serialize/src/serialize.rs +++ b/compiler/rustc_serialize/src/serialize.rs @@ -325,7 +325,7 @@ impl<D: Decoder, const N: usize> Decodable<D> for [u8; N] { } } -impl<'a, S: Encoder, T: Encodable<S>> Encodable<S> for Cow<'a, [T]> +impl<S: Encoder, T: Encodable<S>> Encodable<S> for Cow<'_, [T]> where [T]: ToOwned<Owned = Vec<T>>, { @@ -345,14 +345,14 @@ where } } -impl<'a, S: Encoder> Encodable<S> for Cow<'a, str> { +impl<S: Encoder> Encodable<S> for Cow<'_, str> { fn encode(&self, s: &mut S) { let val: &str = self; val.encode(s) } } -impl<'a, D: Decoder> Decodable<D> for Cow<'a, str> { +impl<D: Decoder> Decodable<D> for Cow<'_, str> { fn decode(d: &mut D) -> Cow<'static, str> { let v: String = Decodable::decode(d); Cow::Owned(v) |
