about summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_lexer/src/lib.rs93
-rw-r--r--compiler/rustc_lint/messages.ftl3
-rw-r--r--compiler/rustc_lint/src/context/diagnostics.rs3
-rw-r--r--compiler/rustc_lint/src/lints.rs7
-rw-r--r--compiler/rustc_lint_defs/src/builtin.rs41
-rw-r--r--compiler/rustc_lint_defs/src/lib.rs2
-rw-r--r--compiler/rustc_parse/messages.ftl4
-rw-r--r--compiler/rustc_parse/src/errors.rs18
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs84
-rw-r--r--compiler/rustc_serialize/src/serialize.rs6
10 files changed, 249 insertions, 12 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 60aab668cba..b0ab50dd773 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -104,6 +104,12 @@ pub enum TokenKind {
     /// for emoji identifier recovery, as those are not meant to be ever accepted.
     InvalidPrefix,
 
+    /// Guarded string literal prefix: `#"` or `##`.
+    ///
+    /// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
+    /// Split into the component tokens on older editions.
+    GuardedStrPrefix,
+
     /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
     /// suffix, but may be present here on string and float literals. Users of
     /// this type will need to check for and reject that case.
@@ -191,30 +197,41 @@ pub enum DocStyle {
 /// `rustc_ast::ast::LitKind`).
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum LiteralKind {
-    /// "12_u8", "0o100", "0b120i99", "1f32".
+    /// `12_u8`, `0o100`, `0b120i99`, `1f32`.
     Int { base: Base, empty_int: bool },
-    /// "12.34f32", "1e3", but not "1f32".
+    /// `12.34f32`, `1e3`, but not `1f32`.
     Float { base: Base, empty_exponent: bool },
-    /// "'a'", "'\\'", "'''", "';"
+    /// `'a'`, `'\\'`, `'''`, `';`
     Char { terminated: bool },
-    /// "b'a'", "b'\\'", "b'''", "b';"
+    /// `b'a'`, `b'\\'`, `b'''`, `b';`
     Byte { terminated: bool },
-    /// ""abc"", ""abc"
+    /// `"abc"`, `"abc`
     Str { terminated: bool },
-    /// "b"abc"", "b"abc"
+    /// `b"abc"`, `b"abc`
     ByteStr { terminated: bool },
     /// `c"abc"`, `c"abc`
     CStr { terminated: bool },
-    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
+    /// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
     /// an invalid literal.
     RawStr { n_hashes: Option<u8> },
-    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
+    /// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
     /// indicates an invalid literal.
     RawByteStr { n_hashes: Option<u8> },
     /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
     RawCStr { n_hashes: Option<u8> },
 }
 
+/// `#"abc"#`, `##"a"` (fewer closing), or even `#"a` (unterminated).
+///
+/// Can capture fewer closing hashes than starting hashes,
+/// for more efficient lexing and better backwards diagnostics.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub struct GuardedStr {
+    pub n_hashes: u32,
+    pub terminated: bool,
+    pub token_len: u32,
+}
+
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum RawStrError {
     /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
@@ -403,6 +420,12 @@ impl Cursor<'_> {
                 TokenKind::Literal { kind: literal_kind, suffix_start }
             }
 
+            // Guarded string literal prefix: `#"` or `##`
+            '#' if matches!(self.first(), '"' | '#') => {
+                self.bump();
+                TokenKind::GuardedStrPrefix
+            }
+
             // One-symbol tokens.
             ';' => Semi,
             ',' => Comma,
@@ -780,6 +803,60 @@ impl Cursor<'_> {
         false
     }
 
+    /// Attempt to lex for a guarded string literal.
+    ///
+    /// Used by `rustc_parse::lexer` to lex for guarded strings
+    /// conditionally based on edition.
+    ///
+    /// Note: this will not reset the `Cursor` when a
+    /// guarded string is not found. It is the caller's
+    /// responsibility to do so.
+    pub fn guarded_double_quoted_string(&mut self) -> Option<GuardedStr> {
+        debug_assert!(self.prev() != '#');
+
+        let mut n_start_hashes: u32 = 0;
+        while self.first() == '#' {
+            n_start_hashes += 1;
+            self.bump();
+        }
+
+        if self.first() != '"' {
+            return None;
+        }
+        self.bump();
+        debug_assert!(self.prev() == '"');
+
+        // Lex the string itself as a normal string literal
+        // so we can recover that for older editions later.
+        let terminated = self.double_quoted_string();
+        if !terminated {
+            let token_len = self.pos_within_token();
+            self.reset_pos_within_token();
+
+            return Some(GuardedStr { n_hashes: n_start_hashes, terminated: false, token_len });
+        }
+
+        // Consume closing '#' symbols.
+        // Note that this will not consume extra trailing `#` characters:
+        // `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3, .. }`
+        // followed by a `#` token.
+        let mut n_end_hashes = 0;
+        while self.first() == '#' && n_end_hashes < n_start_hashes {
+            n_end_hashes += 1;
+            self.bump();
+        }
+
+        // Reserved syntax, always an error, so it doesn't matter if
+        // `n_start_hashes != n_end_hashes`.
+
+        self.eat_literal_suffix();
+
+        let token_len = self.pos_within_token();
+        self.reset_pos_within_token();
+
+        Some(GuardedStr { n_hashes: n_start_hashes, terminated: true, token_len })
+    }
+
     /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
     fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
         // Wrap the actual function to handle the error with too many hashes.
diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl
index d3799594871..e733e92c7cb 100644
--- a/compiler/rustc_lint/messages.ftl
+++ b/compiler/rustc_lint/messages.ftl
@@ -740,6 +740,9 @@ lint_reserved_prefix = prefix `{$prefix}` is unknown
     .label = unknown prefix
     .suggestion = insert whitespace here to avoid this being parsed as a prefix in Rust 2021
 
+lint_reserved_string = will be parsed as a guarded string in Rust 2024
+    .suggestion = insert whitespace here to avoid this being parsed as a guarded string in Rust 2024
+
 lint_shadowed_into_iter =
     this method call resolves to `<&{$target} as IntoIterator>::into_iter` (due to backwards compatibility), but will resolve to `<{$target} as IntoIterator>::into_iter` in Rust {$edition}
     .use_iter_suggestion = use `.iter()` instead of `.into_iter()` to avoid ambiguity
diff --git a/compiler/rustc_lint/src/context/diagnostics.rs b/compiler/rustc_lint/src/context/diagnostics.rs
index b5ab56912cb..565c3c04252 100644
--- a/compiler/rustc_lint/src/context/diagnostics.rs
+++ b/compiler/rustc_lint/src/context/diagnostics.rs
@@ -176,6 +176,9 @@ pub(super) fn decorate_lint(sess: &Session, diagnostic: BuiltinLintDiag, diag: &
             lints::RawPrefix { label: label_span, suggestion: label_span.shrink_to_hi() }
                 .decorate_lint(diag);
         }
+        BuiltinLintDiag::ReservedString(suggestion) => {
+            lints::ReservedString { suggestion }.decorate_lint(diag);
+        }
         BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => {
             lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag);
         }
diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs
index a861894a444..87afeca0b28 100644
--- a/compiler/rustc_lint/src/lints.rs
+++ b/compiler/rustc_lint/src/lints.rs
@@ -3053,3 +3053,10 @@ pub(crate) enum MutRefSugg {
 #[derive(LintDiagnostic)]
 #[diag(lint_unqualified_local_imports)]
 pub(crate) struct UnqualifiedLocalImportsDiag {}
+
+#[derive(LintDiagnostic)]
+#[diag(lint_reserved_string)]
+pub(crate) struct ReservedString {
+    #[suggestion(code = " ", applicability = "machine-applicable")]
+    pub suggestion: Span,
+}
diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs
index a123059df8f..827791c54be 100644
--- a/compiler/rustc_lint_defs/src/builtin.rs
+++ b/compiler/rustc_lint_defs/src/builtin.rs
@@ -92,6 +92,7 @@ declare_lint_pass! {
         RUST_2021_INCOMPATIBLE_OR_PATTERNS,
         RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
         RUST_2021_PRELUDE_COLLISIONS,
+        RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
         RUST_2024_INCOMPATIBLE_PAT,
         RUST_2024_PRELUDE_COLLISIONS,
         SELF_CONSTRUCTOR_FROM_OUTER_ITEM,
@@ -4996,3 +4997,43 @@ declare_lint! {
     Warn,
     "detects pointer to integer transmutes in const functions and associated constants",
 }
+
+declare_lint! {
+    /// The `rust_2024_guarded_string_incompatible_syntax` lint detects `#` tokens
+    /// that will be parsed as part of a guarded string literal in Rust 2024.
+    ///
+    /// ### Example
+    ///
+    /// ```rust,edition2021,compile_fail
+    /// #![deny(rust_2024_guarded_string_incompatible_syntax)]
+    ///
+    /// macro_rules! m {
+    ///     (# $x:expr #) => ();
+    ///     (# $x:expr) => ();
+    /// }
+    ///
+    /// m!(#"hey"#);
+    /// m!(#"hello");
+    /// ```
+    ///
+    /// {{produces}}
+    ///
+    /// ### Explanation
+    ///
+    /// Prior to Rust 2024, `#"hey"#` is three tokens: the first `#`
+    /// followed by the string literal `"hey"` then the final `#`.
+    /// In Rust 2024, the whole sequence is considered a single token.
+    ///
+    /// This lint suggests to add whitespace between the leading `#`
+    /// and the string to keep them separated in Rust 2024.
+    // Allow this lint -- rustdoc doesn't yet support threading edition into this lint's parser.
+    #[allow(rustdoc::invalid_rust_codeblocks)]
+    pub RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
+    Allow,
+    "will be parsed as a guarded string in Rust 2024",
+    @future_incompatible = FutureIncompatibleInfo {
+        reason: FutureIncompatibilityReason::EditionError(Edition::Edition2024),
+        reference: "issue #123735 <https://github.com/rust-lang/rust/issues/123735>",
+    };
+    crate_level_only
+}
diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs
index 386918a5c41..c01fa5c54d6 100644
--- a/compiler/rustc_lint_defs/src/lib.rs
+++ b/compiler/rustc_lint_defs/src/lib.rs
@@ -614,6 +614,8 @@ pub enum BuiltinLintDiag {
     ReservedPrefix(Span, String),
     /// `'r#` in edition < 2021.
     RawPrefix(Span),
+    /// `##` or `#"` is edition < 2024.
+    ReservedString(Span),
     TrailingMacro(bool, Ident),
     BreakWithLabelAndLoop(Span),
     UnicodeTextFlow(Span, String),
diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl
index 948199fd55c..ba5e2ddf4fc 100644
--- a/compiler/rustc_parse/messages.ftl
+++ b/compiler/rustc_parse/messages.ftl
@@ -706,6 +706,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow
     .label = the label
     .suggestion = add `:` after the label
 
+parse_reserved_string = invalid string literal
+    .note = unprefixed guarded string literals are reserved for future use since Rust 2024
+    .suggestion_whitespace = consider inserting whitespace here
+
 parse_return_types_use_thin_arrow = return types are denoted using `->`
     .suggestion = use `->` instead
 
diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs
index dade3912751..124975f67f1 100644
--- a/compiler/rustc_parse/src/errors.rs
+++ b/compiler/rustc_parse/src/errors.rs
@@ -2111,6 +2111,24 @@ pub(crate) enum UnknownPrefixSugg {
 }
 
 #[derive(Diagnostic)]
+#[diag(parse_reserved_string)]
+#[note]
+pub(crate) struct ReservedString {
+    #[primary_span]
+    pub span: Span,
+    #[subdiagnostic]
+    pub sugg: Option<GuardedStringSugg>,
+}
+#[derive(Subdiagnostic)]
+#[suggestion(
+    parse_suggestion_whitespace,
+    code = " ",
+    applicability = "maybe-incorrect",
+    style = "verbose"
+)]
+pub(crate) struct GuardedStringSugg(#[primary_span] pub Span);
+
+#[derive(Diagnostic)]
 #[diag(parse_too_many_hashes)]
 pub(crate) struct TooManyHashes {
     #[primary_span]
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 3e46fc93fa4..d627ef3d2cb 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -10,7 +10,8 @@ use rustc_lexer::unescape::{self, EscapeError, Mode};
 use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
 use rustc_session::lint::BuiltinLintDiag;
 use rustc_session::lint::builtin::{
-    RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
+    RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
+    TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
 };
 use rustc_session::parse::ParseSess;
 use rustc_span::symbol::Symbol;
@@ -251,6 +252,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
                     let prefix_span = self.mk_sp(start, lit_start);
                     return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
                 }
+                rustc_lexer::TokenKind::GuardedStrPrefix => self.maybe_report_guarded_str(start, str_before),
                 rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
                     let suffix_start = start + BytePos(suffix_start);
                     let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
@@ -781,6 +783,86 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
         }
     }
 
+    /// Detect guarded string literal syntax
+    ///
+    /// RFC 3598 reserved this syntax for future use. As of Rust 2024,
+    /// using this syntax produces an error. In earlier editions, however, it
+    /// only results in an (allowed by default) lint, and is treated as
+    /// separate tokens.
+    fn maybe_report_guarded_str(&mut self, start: BytePos, str_before: &'src str) -> TokenKind {
+        let span = self.mk_sp(start, self.pos);
+        let edition2024 = span.edition().at_least_rust_2024();
+
+        let space_pos = start + BytePos(1);
+        let space_span = self.mk_sp(space_pos, space_pos);
+
+        let mut cursor = Cursor::new(str_before);
+
+        let (span, unterminated) = match cursor.guarded_double_quoted_string() {
+            Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => {
+                let end = start + BytePos(token_len);
+                let span = self.mk_sp(start, end);
+                let str_start = start + BytePos(n_hashes);
+
+                if edition2024 {
+                    self.cursor = cursor;
+                    self.pos = end;
+                }
+
+                let unterminated = if terminated { None } else { Some(str_start) };
+
+                (span, unterminated)
+            }
+            _ => {
+                // We should only get here in the `##+` case.
+                debug_assert_eq!(self.str_from_to(start, start + BytePos(2)), "##");
+
+                (span, None)
+            }
+        };
+        if edition2024 {
+            if let Some(str_start) = unterminated {
+                // Only a fatal error if string is unterminated.
+                self.dcx()
+                    .struct_span_fatal(
+                        self.mk_sp(str_start, self.pos),
+                        "unterminated double quote string",
+                    )
+                    .with_code(E0765)
+                    .emit()
+            }
+
+            let sugg = if span.from_expansion() {
+                None
+            } else {
+                Some(errors::GuardedStringSugg(space_span))
+            };
+
+            // In Edition 2024 and later, emit a hard error.
+            let err = self.dcx().emit_err(errors::ReservedString { span, sugg });
+
+            token::Literal(token::Lit {
+                kind: token::Err(err),
+                symbol: self.symbol_from_to(start, self.pos),
+                suffix: None,
+            })
+        } else {
+            // Before Rust 2024, only emit a lint for migration.
+            self.psess.buffer_lint(
+                RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
+                span,
+                ast::CRATE_NODE_ID,
+                BuiltinLintDiag::ReservedString(space_span),
+            );
+
+            // For backwards compatibility, roll back to after just the first `#`
+            // and return the `Pound` token.
+            self.pos = start + BytePos(1);
+            self.cursor = Cursor::new(&str_before[1..]);
+            token::Pound
+        }
+    }
+
     fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
         self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
     }
diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs
index 2d37f5bdab8..bae5f259fcc 100644
--- a/compiler/rustc_serialize/src/serialize.rs
+++ b/compiler/rustc_serialize/src/serialize.rs
@@ -325,7 +325,7 @@ impl<D: Decoder, const N: usize> Decodable<D> for [u8; N] {
     }
 }
 
-impl<'a, S: Encoder, T: Encodable<S>> Encodable<S> for Cow<'a, [T]>
+impl<S: Encoder, T: Encodable<S>> Encodable<S> for Cow<'_, [T]>
 where
     [T]: ToOwned<Owned = Vec<T>>,
 {
@@ -345,14 +345,14 @@ where
     }
 }
 
-impl<'a, S: Encoder> Encodable<S> for Cow<'a, str> {
+impl<S: Encoder> Encodable<S> for Cow<'_, str> {
     fn encode(&self, s: &mut S) {
         let val: &str = self;
         val.encode(s)
     }
 }
 
-impl<'a, D: Decoder> Decodable<D> for Cow<'a, str> {
+impl<D: Decoder> Decodable<D> for Cow<'_, str> {
     fn decode(d: &mut D) -> Cow<'static, str> {
         let v: String = Decodable::decode(d);
         Cow::Owned(v)