Initial implementation of RFC 2151, Raw Identifiers

author: Lymia Aluysia <lymia@lymiahugs.com> 2018-03-09 23:56:40 -0600
committer: Lymia Aluysia <lymia@lymiahugs.com> 2018-03-18 10:07:19 -0500
commit: fad1648e0f8299a8b108f85c2b1055eb37bdab9e (patch)
tree: 1a54c05782a65c39d2a01d3afe7a210e3dbe2c2f /src/libsyntax/parse/lexer
parent: 8aa27ee30972f16320ae4a8887c8f54616fff819 (diff)
download: rust-fad1648e0f8299a8b108f85c2b1055eb37bdab9e.tar.gz
rust-fad1648e0f8299a8b108f85c2b1055eb37bdab9e.zip
1 files changed, 46 insertions, 23 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 0e20eb49d39..0596fb44abe 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -14,7 +14,7 @@ use codemap::{CodeMap, FilePathMapping};
 use errors::{FatalError, DiagnosticBuilder};
 use parse::{token, ParseSess};
 use str::char_at;
-use symbol::Symbol;
+use symbol::{Symbol, keywords};
 use std_unicode::property::Pattern_White_Space;
 
 use std::borrow::Cow;
@@ -1115,26 +1115,49 @@ impl<'a> StringReader<'a> {
     /// token, and updates the interner
     fn next_token_inner(&mut self) -> Result<token::Token, ()> {
         let c = self.ch;
-        if ident_start(c) &&
-           match (c.unwrap(), self.nextch(), self.nextnextch()) {
-            // Note: r as in r" or r#" is part of a raw string literal,
-            // b as in b' is part of a byte literal.
-            // They are not identifiers, and are handled further down.
-            ('r', Some('"'), _) |
-            ('r', Some('#'), _) |
-            ('b', Some('"'), _) |
-            ('b', Some('\''), _) |
-            ('b', Some('r'), Some('"')) |
-            ('b', Some('r'), Some('#')) => false,
-            _ => true,
-        } {
-            let start = self.pos;
-            while ident_continue(self.ch) {
-                self.bump();
-            }
 
-            // FIXME: perform NFKC normalization here. (Issue #2253)
-            return Ok(self.with_str_from(start, |string| token::Ident(self.mk_ident(string))));
+        if ident_start(c) {
+            let (is_ident_start, is_raw_ident) =
+                match (c.unwrap(), self.nextch(), self.nextnextch()) {
+                    // r# followed by an identifier starter is a raw identifier.
+                    // This is an exception to the r# case below.
+                    ('r', Some('#'), x) if ident_start(x) => (true, true),
+                    // r as in r" or r#" is part of a raw string literal.
+                    // b as in b' is part of a byte literal.
+                    // They are not identifiers, and are handled further down.
+                    ('r', Some('"'), _) |
+                    ('r', Some('#'), _) |
+                    ('b', Some('"'), _) |
+                    ('b', Some('\''), _) |
+                    ('b', Some('r'), Some('"')) |
+                    ('b', Some('r'), Some('#')) => (false, false),
+                    _ => (true, false),
+                };
+            if is_ident_start {
+                let raw_start = self.pos;
+                if is_raw_ident {
+                    // Consume the 'r#' characters.
+                    self.bump();
+                    self.bump();
+                }
+
+                let start = self.pos;
+                while ident_continue(self.ch) {
+                    self.bump();
+                }
+
+                return Ok(self.with_str_from(start, |string| {
+                    // FIXME: perform NFKC normalization here. (Issue #2253)
+                    let ident = self.mk_ident(string);
+                    if is_raw_ident && (token::is_path_segment_keyword(ident) ||
+                                        ident.name == keywords::Underscore.name()) {
+                        self.fatal_span_(raw_start, self.pos,
+                            &format!("`r#{}` is not currently supported.", ident.name)
+                        ).raise();
+                    }
+                    token::Ident(ident, is_raw_ident)
+                }));
+            }
         }
 
         if is_dec_digit(c) {
@@ -1801,7 +1824,7 @@ mod tests {
             assert_eq!(string_reader.next_token().tok, token::Whitespace);
             let tok1 = string_reader.next_token();
             let tok2 = TokenAndSpan {
-                tok: token::Ident(id),
+                tok: token::Ident(id, false),
                 sp: Span::new(BytePos(21), BytePos(23), NO_EXPANSION),
             };
             assert_eq!(tok1, tok2);
@@ -1811,7 +1834,7 @@ mod tests {
             // read another token:
             let tok3 = string_reader.next_token();
             let tok4 = TokenAndSpan {
-                tok: token::Ident(Ident::from_str("main")),
+                tok: mk_ident("main"),
                 sp: Span::new(BytePos(24), BytePos(28), NO_EXPANSION),
             };
             assert_eq!(tok3, tok4);
@@ -1830,7 +1853,7 @@ mod tests {
 
     // make the identifier by looking up the string in the interner
     fn mk_ident(id: &str) -> token::Token {
-        token::Ident(Ident::from_str(id))
+        token::Token::from_ast_ident(Ident::from_str(id))
     }
 
     #[test]
author	Lymia Aluysia <lymia@lymiahugs.com>	2018-03-09 23:56:40 -0600
committer	Lymia Aluysia <lymia@lymiahugs.com>	2018-03-18 10:07:19 -0500
commit	fad1648e0f8299a8b108f85c2b1055eb37bdab9e (patch)
tree	1a54c05782a65c39d2a01d3afe7a210e3dbe2c2f /src/libsyntax/parse/lexer
parent	8aa27ee30972f16320ae4a8887c8f54616fff819 (diff)
download	rust-fad1648e0f8299a8b108f85c2b1055eb37bdab9e.tar.gz rust-fad1648e0f8299a8b108f85c2b1055eb37bdab9e.zip