diff options
| author | lrh2000 <lrh2000@pku.edu.cn> | 2021-05-16 11:10:05 +0800 |
|---|---|---|
| committer | lrh2000 <lrh2000@pku.edu.cn> | 2021-06-26 23:09:43 +0800 |
| commit | 8dee9bc8fcaf74776d067f34745bc4d7411d80f7 (patch) | |
| tree | eab3a0117f3ae89f6f7248124821a922016f8048 /compiler | |
| parent | 831ae3c1364b7b033bd1da430bc1cb86d93ad186 (diff) | |
| download | rust-8dee9bc8fcaf74776d067f34745bc4d7411d80f7.tar.gz rust-8dee9bc8fcaf74776d067f34745bc4d7411d80f7.zip | |
Reserve prefixed identifiers and string literals (RFC 3101)
This commit denies any identifiers immediately followed by one of three tokens `"`, `'` or `#`, which is stricter than the requirements of RFC 3101 but may be necessary according to the discussion at [Zulip]. [Zulip]: https://rust-lang.zulipchat.com/#narrow/stream/268952-edition-2021/topic/reserved.20prefixes/near/238470099
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 17 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 32 |
2 files changed, 43 insertions, 6 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index b9781581ff7..5f3e245bf99 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -66,6 +66,8 @@ pub enum TokenKind { Ident, /// "r#ident" RawIdent, + /// `foo#`, `foo'`, `foo"`. Note the tailer is not included. + BadPrefix, /// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details. Literal { kind: LiteralKind, suffix_start: usize }, /// "'a" @@ -323,7 +325,7 @@ impl Cursor<'_> { let kind = RawStr { n_hashes, err }; Literal { kind, suffix_start } } - _ => self.ident(), + _ => self.ident_or_bad_prefix(), }, // Byte literal, byte string literal, raw byte string literal or identifier. @@ -358,12 +360,12 @@ impl Cursor<'_> { let kind = RawByteStr { n_hashes, err }; Literal { kind, suffix_start } } - _ => self.ident(), + _ => self.ident_or_bad_prefix(), }, // Identifier (this should be checked after other variant that can // start as identifier). - c if is_id_start(c) => self.ident(), + c if is_id_start(c) => self.ident_or_bad_prefix(), // Numeric literal. c @ '0'..='9' => { @@ -487,11 +489,16 @@ impl Cursor<'_> { RawIdent } - fn ident(&mut self) -> TokenKind { + fn ident_or_bad_prefix(&mut self) -> TokenKind { debug_assert!(is_id_start(self.prev())); // Start is already eaten, eat the rest of identifier. self.eat_while(is_id_continue); - Ident + // Good prefixes must have been handled eariler. So if + // we see a prefix here, it is definitely a bad prefix. + match self.first() { + '#' | '"' | '\'' => BadPrefix, + _ => Ident, + } } fn number(&mut self, first_digit: char) -> LiteralKind { diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 1c2f9a9645f..5710c386e3b 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -5,6 +5,7 @@ use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PRe use rustc_lexer::unescape::{self, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; use rustc_session::parse::ParseSess; +use rustc_span::edition::Edition; use rustc_span::symbol::{sym, Symbol}; use rustc_span::{BytePos, Pos, Span}; @@ -166,12 +167,18 @@ impl<'a> StringReader<'a> { self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } rustc_lexer::TokenKind::Whitespace => return None, - rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => { + rustc_lexer::TokenKind::Ident + | rustc_lexer::TokenKind::RawIdent + | rustc_lexer::TokenKind::BadPrefix => { let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; + let is_bad_prefix = token == rustc_lexer::TokenKind::BadPrefix; let mut ident_start = start; if is_raw_ident { ident_start = ident_start + BytePos(2); } + if is_bad_prefix { + self.report_reserved_prefix(start); + } let sym = nfc_normalize(self.str_from(ident_start)); let span = self.mk_sp(start, self.pos); self.sess.symbol_gallery.insert(sym, span); @@ -491,6 +498,29 @@ impl<'a> StringReader<'a> { FatalError.raise() } + fn report_reserved_prefix(&self, start: BytePos) { + // See RFC 3101. + if self.sess.edition < Edition::Edition2021 { + return; + } + + let mut err = self.sess.span_diagnostic.struct_span_err( + self.mk_sp(start, self.pos), + &format!("prefix `{}` is unknown", self.str_from_to(start, self.pos)), + ); + err.span_label(self.mk_sp(start, self.pos), "unknown prefix"); + err.span_label( + self.mk_sp(self.pos, self.pos), + &format!( + "help: consider inserting a whitespace before this `{}`", + self.str_from_to(self.pos, self.pos + BytePos(1)), + ), + ); + err.note("prefixed identifiers and string literals are reserved since Rust 2021"); + + err.emit(); + } + /// Note: It was decided to not add a test case, because it would be too big. /// <https://github.com/rust-lang/rust/pull/50296#issuecomment-392135180> fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! { |
