Reserve prefixed identifiers and string literals (RFC 3101)

This commit denies any identifiers immediately followed by one of three tokens `"`, `'` or `#`, which is stricter than the requirements of RFC 3101 but may be necessary according to the discussion at [Zulip]. [Zulip]: https://rust-lang.zulipchat.com/#narrow/stream/268952-edition-2021/topic/reserved.20prefixes/near/238470099
author: lrh2000 <lrh2000@pku.edu.cn> 2021-05-16 11:10:05 +0800
committer: lrh2000 <lrh2000@pku.edu.cn> 2021-06-26 23:09:43 +0800
commit: 8dee9bc8fcaf74776d067f34745bc4d7411d80f7 (patch)
tree: eab3a0117f3ae89f6f7248124821a922016f8048 /compiler
parent: 831ae3c1364b7b033bd1da430bc1cb86d93ad186 (diff)
download: rust-8dee9bc8fcaf74776d067f34745bc4d7411d80f7.tar.gz
rust-8dee9bc8fcaf74776d067f34745bc4d7411d80f7.zip
2 files changed, 43 insertions, 6 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index b9781581ff7..5f3e245bf99 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -66,6 +66,8 @@ pub enum TokenKind {
     Ident,
     /// "r#ident"
     RawIdent,
+    /// `foo#`, `foo'`, `foo"`. Note the tailer is not included.
+    BadPrefix,
     /// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
     Literal { kind: LiteralKind, suffix_start: usize },
     /// "'a"
@@ -323,7 +325,7 @@ impl Cursor<'_> {
                     let kind = RawStr { n_hashes, err };
                     Literal { kind, suffix_start }
                 }
-                _ => self.ident(),
+                _ => self.ident_or_bad_prefix(),
             },
 
             // Byte literal, byte string literal, raw byte string literal or identifier.
@@ -358,12 +360,12 @@ impl Cursor<'_> {
                     let kind = RawByteStr { n_hashes, err };
                     Literal { kind, suffix_start }
                 }
-                _ => self.ident(),
+                _ => self.ident_or_bad_prefix(),
             },
 
             // Identifier (this should be checked after other variant that can
             // start as identifier).
-            c if is_id_start(c) => self.ident(),
+            c if is_id_start(c) => self.ident_or_bad_prefix(),
 
             // Numeric literal.
             c @ '0'..='9' => {
@@ -487,11 +489,16 @@ impl Cursor<'_> {
         RawIdent
     }
 
-    fn ident(&mut self) -> TokenKind {
+    fn ident_or_bad_prefix(&mut self) -> TokenKind {
         debug_assert!(is_id_start(self.prev()));
         // Start is already eaten, eat the rest of identifier.
         self.eat_while(is_id_continue);
-        Ident
+        // Good prefixes must have been handled eariler. So if
+        // we see a prefix here, it is definitely a bad prefix.
+        match self.first() {
+            '#' | '"' | '\'' => BadPrefix,
+            _ => Ident,
+        }
     }
 
     fn number(&mut self, first_digit: char) -> LiteralKind {
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 1c2f9a9645f..5710c386e3b 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -5,6 +5,7 @@ use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PRe
 use rustc_lexer::unescape::{self, Mode};
 use rustc_lexer::{Base, DocStyle, RawStrError};
 use rustc_session::parse::ParseSess;
+use rustc_span::edition::Edition;
 use rustc_span::symbol::{sym, Symbol};
 use rustc_span::{BytePos, Pos, Span};
 
@@ -166,12 +167,18 @@ impl<'a> StringReader<'a> {
                 self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
             }
             rustc_lexer::TokenKind::Whitespace => return None,
-            rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
+            rustc_lexer::TokenKind::Ident
+            | rustc_lexer::TokenKind::RawIdent
+            | rustc_lexer::TokenKind::BadPrefix => {
                 let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
+                let is_bad_prefix = token == rustc_lexer::TokenKind::BadPrefix;
                 let mut ident_start = start;
                 if is_raw_ident {
                     ident_start = ident_start + BytePos(2);
                 }
+                if is_bad_prefix {
+                    self.report_reserved_prefix(start);
+                }
                 let sym = nfc_normalize(self.str_from(ident_start));
                 let span = self.mk_sp(start, self.pos);
                 self.sess.symbol_gallery.insert(sym, span);
@@ -491,6 +498,29 @@ impl<'a> StringReader<'a> {
         FatalError.raise()
     }
 
+    fn report_reserved_prefix(&self, start: BytePos) {
+        // See RFC 3101.
+        if self.sess.edition < Edition::Edition2021 {
+            return;
+        }
+
+        let mut err = self.sess.span_diagnostic.struct_span_err(
+            self.mk_sp(start, self.pos),
+            &format!("prefix `{}` is unknown", self.str_from_to(start, self.pos)),
+        );
+        err.span_label(self.mk_sp(start, self.pos), "unknown prefix");
+        err.span_label(
+            self.mk_sp(self.pos, self.pos),
+            &format!(
+                "help: consider inserting a whitespace before this `{}`",
+                self.str_from_to(self.pos, self.pos + BytePos(1)),
+            ),
+        );
+        err.note("prefixed identifiers and string literals are reserved since Rust 2021");
+
+        err.emit();
+    }
+
     /// Note: It was decided to not add a test case, because it would be too big.
     /// <https://github.com/rust-lang/rust/pull/50296#issuecomment-392135180>
     fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! {
author	lrh2000 <lrh2000@pku.edu.cn>	2021-05-16 11:10:05 +0800
committer	lrh2000 <lrh2000@pku.edu.cn>	2021-06-26 23:09:43 +0800
commit	8dee9bc8fcaf74776d067f34745bc4d7411d80f7 (patch)
tree	eab3a0117f3ae89f6f7248124821a922016f8048 /compiler
parent	831ae3c1364b7b033bd1da430bc1cb86d93ad186 (diff)
download	rust-8dee9bc8fcaf74776d067f34745bc4d7411d80f7.tar.gz rust-8dee9bc8fcaf74776d067f34745bc4d7411d80f7.zip