about summary refs log tree commit diff
path: root/src/libsyntax/parse
diff options
context:
space:
mode:
authorLymia Aluysia <lymia@lymiahugs.com>2018-03-09 23:56:40 -0600
committerLymia Aluysia <lymia@lymiahugs.com>2018-03-18 10:07:19 -0500
commitfad1648e0f8299a8b108f85c2b1055eb37bdab9e (patch)
tree1a54c05782a65c39d2a01d3afe7a210e3dbe2c2f /src/libsyntax/parse
parent8aa27ee30972f16320ae4a8887c8f54616fff819 (diff)
downloadrust-fad1648e0f8299a8b108f85c2b1055eb37bdab9e.tar.gz
rust-fad1648e0f8299a8b108f85c2b1055eb37bdab9e.zip
Initial implementation of RFC 2151, Raw Identifiers
Diffstat (limited to 'src/libsyntax/parse')
-rw-r--r--src/libsyntax/parse/lexer/mod.rs69
-rw-r--r--src/libsyntax/parse/mod.rs20
-rw-r--r--src/libsyntax/parse/parser.rs29
-rw-r--r--src/libsyntax/parse/token.rs108
4 files changed, 153 insertions, 73 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 0e20eb49d39..0596fb44abe 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -14,7 +14,7 @@ use codemap::{CodeMap, FilePathMapping};
 use errors::{FatalError, DiagnosticBuilder};
 use parse::{token, ParseSess};
 use str::char_at;
-use symbol::Symbol;
+use symbol::{Symbol, keywords};
 use std_unicode::property::Pattern_White_Space;
 
 use std::borrow::Cow;
@@ -1115,26 +1115,49 @@ impl<'a> StringReader<'a> {
     /// token, and updates the interner
     fn next_token_inner(&mut self) -> Result<token::Token, ()> {
         let c = self.ch;
-        if ident_start(c) &&
-           match (c.unwrap(), self.nextch(), self.nextnextch()) {
-            // Note: r as in r" or r#" is part of a raw string literal,
-            // b as in b' is part of a byte literal.
-            // They are not identifiers, and are handled further down.
-            ('r', Some('"'), _) |
-            ('r', Some('#'), _) |
-            ('b', Some('"'), _) |
-            ('b', Some('\''), _) |
-            ('b', Some('r'), Some('"')) |
-            ('b', Some('r'), Some('#')) => false,
-            _ => true,
-        } {
-            let start = self.pos;
-            while ident_continue(self.ch) {
-                self.bump();
-            }
 
-            // FIXME: perform NFKC normalization here. (Issue #2253)
-            return Ok(self.with_str_from(start, |string| token::Ident(self.mk_ident(string))));
+        if ident_start(c) {
+            let (is_ident_start, is_raw_ident) =
+                match (c.unwrap(), self.nextch(), self.nextnextch()) {
+                    // r# followed by an identifier starter is a raw identifier.
+                    // This is an exception to the r# case below.
+                    ('r', Some('#'), x) if ident_start(x) => (true, true),
+                    // r as in r" or r#" is part of a raw string literal.
+                    // b as in b' is part of a byte literal.
+                    // They are not identifiers, and are handled further down.
+                    ('r', Some('"'), _) |
+                    ('r', Some('#'), _) |
+                    ('b', Some('"'), _) |
+                    ('b', Some('\''), _) |
+                    ('b', Some('r'), Some('"')) |
+                    ('b', Some('r'), Some('#')) => (false, false),
+                    _ => (true, false),
+                };
+            if is_ident_start {
+                let raw_start = self.pos;
+                if is_raw_ident {
+                    // Consume the 'r#' characters.
+                    self.bump();
+                    self.bump();
+                }
+
+                let start = self.pos;
+                while ident_continue(self.ch) {
+                    self.bump();
+                }
+
+                return Ok(self.with_str_from(start, |string| {
+                    // FIXME: perform NFKC normalization here. (Issue #2253)
+                    let ident = self.mk_ident(string);
+                    if is_raw_ident && (token::is_path_segment_keyword(ident) ||
+                                        ident.name == keywords::Underscore.name()) {
+                        self.fatal_span_(raw_start, self.pos,
+                            &format!("`r#{}` is not currently supported.", ident.name)
+                        ).raise();
+                    }
+                    token::Ident(ident, is_raw_ident)
+                }));
+            }
         }
 
         if is_dec_digit(c) {
@@ -1801,7 +1824,7 @@ mod tests {
             assert_eq!(string_reader.next_token().tok, token::Whitespace);
             let tok1 = string_reader.next_token();
             let tok2 = TokenAndSpan {
-                tok: token::Ident(id),
+                tok: token::Ident(id, false),
                 sp: Span::new(BytePos(21), BytePos(23), NO_EXPANSION),
             };
             assert_eq!(tok1, tok2);
@@ -1811,7 +1834,7 @@ mod tests {
             // read another token:
             let tok3 = string_reader.next_token();
             let tok4 = TokenAndSpan {
-                tok: token::Ident(Ident::from_str("main")),
+                tok: mk_ident("main"),
                 sp: Span::new(BytePos(24), BytePos(28), NO_EXPANSION),
             };
             assert_eq!(tok3, tok4);
@@ -1830,7 +1853,7 @@ mod tests {
 
     // make the identifier by looking up the string in the interner
     fn mk_ident(id: &str) -> token::Token {
-        token::Ident(Ident::from_str(id))
+        token::Token::from_ast_ident(Ident::from_str(id))
     }
 
     #[test]
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index f7e5d40b524..4acfdab53c0 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -741,9 +741,9 @@ mod tests {
             match (tts.len(), tts.get(0), tts.get(1), tts.get(2), tts.get(3)) {
                 (
                     4,
-                    Some(&TokenTree::Token(_, token::Ident(name_macro_rules))),
+                    Some(&TokenTree::Token(_, token::Ident(name_macro_rules, false))),
                     Some(&TokenTree::Token(_, token::Not)),
-                    Some(&TokenTree::Token(_, token::Ident(name_zip))),
+                    Some(&TokenTree::Token(_, token::Ident(name_zip, false))),
                     Some(&TokenTree::Delimited(_, ref macro_delimed)),
                 )
                 if name_macro_rules.name == "macro_rules"
@@ -762,7 +762,7 @@ mod tests {
                                 (
                                     2,
                                     Some(&TokenTree::Token(_, token::Dollar)),
-                                    Some(&TokenTree::Token(_, token::Ident(ident))),
+                                    Some(&TokenTree::Token(_, token::Ident(ident, false))),
                                 )
                                 if first_delimed.delim == token::Paren && ident.name == "a" => {},
                                 _ => panic!("value 3: {:?}", *first_delimed),
@@ -772,7 +772,7 @@ mod tests {
                                 (
                                     2,
                                     Some(&TokenTree::Token(_, token::Dollar)),
-                                    Some(&TokenTree::Token(_, token::Ident(ident))),
+                                    Some(&TokenTree::Token(_, token::Ident(ident, false))),
                                 )
                                 if second_delimed.delim == token::Paren
                                 && ident.name == "a" => {},
@@ -793,17 +793,18 @@ mod tests {
             let tts = string_to_stream("fn a (b : i32) { b; }".to_string());
 
             let expected = TokenStream::concat(vec![
-                TokenTree::Token(sp(0, 2), token::Ident(Ident::from_str("fn"))).into(),
-                TokenTree::Token(sp(3, 4), token::Ident(Ident::from_str("a"))).into(),
+                TokenTree::Token(sp(0, 2), token::Ident(Ident::from_str("fn"), false)).into(),
+                TokenTree::Token(sp(3, 4), token::Ident(Ident::from_str("a"), false)).into(),
                 TokenTree::Delimited(
                     sp(5, 14),
                     tokenstream::Delimited {
                         delim: token::DelimToken::Paren,
                         tts: TokenStream::concat(vec![
-                            TokenTree::Token(sp(6, 7), token::Ident(Ident::from_str("b"))).into(),
+                            TokenTree::Token(sp(6, 7),
+                                             token::Ident(Ident::from_str("b"), false)).into(),
                             TokenTree::Token(sp(8, 9), token::Colon).into(),
                             TokenTree::Token(sp(10, 13),
-                                             token::Ident(Ident::from_str("i32"))).into(),
+                                             token::Ident(Ident::from_str("i32"), false)).into(),
                         ]).into(),
                     }).into(),
                 TokenTree::Delimited(
@@ -811,7 +812,8 @@ mod tests {
                     tokenstream::Delimited {
                         delim: token::DelimToken::Brace,
                         tts: TokenStream::concat(vec![
-                            TokenTree::Token(sp(17, 18), token::Ident(Ident::from_str("b"))).into(),
+                            TokenTree::Token(sp(17, 18),
+                                             token::Ident(Ident::from_str("b"), false)).into(),
                             TokenTree::Token(sp(18, 19), token::Semi).into(),
                         ]).into(),
                     }).into()
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index cb5010a638d..4c1575cf589 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -358,7 +358,7 @@ impl TokenCursor {
 
         let body = TokenTree::Delimited(sp, Delimited {
             delim: token::Bracket,
-            tts: [TokenTree::Token(sp, token::Ident(ast::Ident::from_str("doc"))),
+            tts: [TokenTree::Token(sp, token::Ident(ast::Ident::from_str("doc"), false)),
                   TokenTree::Token(sp, token::Eq),
                   TokenTree::Token(sp, token::Literal(
                       token::StrRaw(Symbol::intern(&stripped), num_of_hashes), None))]
@@ -784,7 +784,7 @@ impl<'a> Parser<'a> {
 
     fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, ast::Ident> {
         match self.token {
-            token::Ident(i) => {
+            token::Ident(i, _) => {
                 if self.token.is_reserved_ident() {
                     let mut err = self.expected_ident_found();
                     if recover {
@@ -1925,7 +1925,7 @@ impl<'a> Parser<'a> {
 
     pub fn parse_path_segment_ident(&mut self) -> PResult<'a, ast::Ident> {
         match self.token {
-            token::Ident(sid) if self.token.is_path_segment_keyword() => {
+            token::Ident(sid, _) if self.token.is_path_segment_keyword() => {
                 self.bump();
                 Ok(sid)
             }
@@ -2740,11 +2740,14 @@ impl<'a> Parser<'a> {
     }
 
     pub fn process_potential_macro_variable(&mut self) {
-        let ident = match self.token {
+        let (ident, is_raw) = match self.token {
             token::Dollar if self.span.ctxt() != syntax_pos::hygiene::SyntaxContext::empty() &&
                              self.look_ahead(1, |t| t.is_ident()) => {
                 self.bump();
-                let name = match self.token { token::Ident(ident) => ident, _ => unreachable!() };
+                let name = match self.token {
+                    token::Ident(ident, _) => ident,
+                    _ => unreachable!()
+                };
                 let mut err = self.fatal(&format!("unknown macro variable `{}`", name));
                 err.span_label(self.span, "unknown macro variable");
                 err.emit();
@@ -2753,13 +2756,13 @@ impl<'a> Parser<'a> {
             token::Interpolated(ref nt) => {
                 self.meta_var_span = Some(self.span);
                 match nt.0 {
-                    token::NtIdent(ident) => ident,
+                    token::NtIdent(ident, is_raw) => (ident, is_raw),
                     _ => return,
                 }
             }
             _ => return,
         };
-        self.token = token::Ident(ident.node);
+        self.token = token::Ident(ident.node, is_raw);
         self.span = ident.span;
     }
 
@@ -4245,7 +4248,7 @@ impl<'a> Parser<'a> {
                      -> PResult<'a, Option<P<Item>>> {
         let token_lo = self.span;
         let (ident, def) = match self.token {
-            token::Ident(ident) if ident.name == keywords::Macro.name() => {
+            token::Ident(ident, false) if ident.name == keywords::Macro.name() => {
                 self.bump();
                 let ident = self.parse_ident()?;
                 let tokens = if self.check(&token::OpenDelim(token::Brace)) {
@@ -4273,7 +4276,7 @@ impl<'a> Parser<'a> {
 
                 (ident, ast::MacroDef { tokens: tokens.into(), legacy: false })
             }
-            token::Ident(ident) if ident.name == "macro_rules" &&
+            token::Ident(ident, _) if ident.name == "macro_rules" &&
                                    self.look_ahead(1, |t| *t == token::Not) => {
                 let prev_span = self.prev_span;
                 self.complain_if_pub_macro(&vis.node, prev_span);
@@ -5078,7 +5081,9 @@ impl<'a> Parser<'a> {
     fn parse_self_arg(&mut self) -> PResult<'a, Option<Arg>> {
         let expect_ident = |this: &mut Self| match this.token {
             // Preserve hygienic context.
-            token::Ident(ident) => { let sp = this.span; this.bump(); codemap::respan(sp, ident) }
+            token::Ident(ident, _) => {
+                let sp = this.span; this.bump(); codemap::respan(sp, ident)
+            }
             _ => unreachable!()
         };
         let isolated_self = |this: &mut Self, n| {
@@ -5375,7 +5380,7 @@ impl<'a> Parser<'a> {
             VisibilityKind::Inherited => Ok(()),
             _ => {
                 let is_macro_rules: bool = match self.token {
-                    token::Ident(sid) => sid.name == Symbol::intern("macro_rules"),
+                    token::Ident(sid, _) => sid.name == Symbol::intern("macro_rules"),
                     _ => false,
                 };
                 if is_macro_rules {
@@ -7016,7 +7021,7 @@ impl<'a> Parser<'a> {
     fn parse_rename(&mut self) -> PResult<'a, Option<Ident>> {
         if self.eat_keyword(keywords::As) {
             match self.token {
-                token::Ident(ident) if ident.name == keywords::Underscore.name() => {
+                token::Ident(ident, false) if ident.name == keywords::Underscore.name() => {
                     self.bump(); // `_`
                     Ok(Some(Ident { name: ident.name.gensymed(), ..ident }))
                 }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index 4ada9e20f2c..6406651bcba 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -91,8 +91,8 @@ impl Lit {
     }
 }
 
-fn ident_can_begin_expr(ident: ast::Ident) -> bool {
-    let ident_token: Token = Ident(ident);
+fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
+    let ident_token: Token = Ident(ident, is_raw);
 
     !ident_token.is_reserved_ident() ||
     ident_token.is_path_segment_keyword() ||
@@ -116,8 +116,8 @@ fn ident_can_begin_expr(ident: ast::Ident) -> bool {
     ].contains(&ident.name)
 }
 
-fn ident_can_begin_type(ident: ast::Ident) -> bool {
-    let ident_token: Token = Ident(ident);
+fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool {
+    let ident_token: Token = Ident(ident, is_raw);
 
     !ident_token.is_reserved_ident() ||
     ident_token.is_path_segment_keyword() ||
@@ -132,6 +132,37 @@ fn ident_can_begin_type(ident: ast::Ident) -> bool {
     ].contains(&ident.name)
 }
 
+pub fn is_path_segment_keyword(id: ast::Ident) -> bool {
+    id.name == keywords::Super.name() ||
+    id.name == keywords::SelfValue.name() ||
+    id.name == keywords::SelfType.name() ||
+    id.name == keywords::Extern.name() ||
+    id.name == keywords::Crate.name() ||
+    id.name == keywords::CrateRoot.name() ||
+    id.name == keywords::DollarCrate.name()
+}
+
+// Returns true for reserved identifiers used internally for elided lifetimes,
+// unnamed method parameters, crate root module, error recovery etc.
+pub fn is_special_ident(id: ast::Ident) -> bool {
+    id.name <= keywords::Underscore.name()
+}
+
+/// Returns `true` if the token is a keyword used in the language.
+pub fn is_used_keyword(id: ast::Ident) -> bool {
+    id.name >= keywords::As.name() && id.name <= keywords::While.name()
+}
+
+/// Returns `true` if the token is a keyword reserved for possible future use.
+pub fn is_unused_keyword(id: ast::Ident) -> bool {
+    id.name >= keywords::Abstract.name() && id.name <= keywords::Yield.name()
+}
+
+/// Returns `true` if the token is either a special identifier or a keyword.
+pub fn is_reserved_ident(id: ast::Ident) -> bool {
+    is_special_ident(id) || is_used_keyword(id) || is_unused_keyword(id)
+}
+
 #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug)]
 pub enum Token {
     /* Expression-operator symbols. */
@@ -175,7 +206,7 @@ pub enum Token {
     Literal(Lit, Option<ast::Name>),
 
     /* Name components */
-    Ident(ast::Ident),
+    Ident(ast::Ident, /* is_raw */ bool),
     Lifetime(ast::Ident),
 
     // The `LazyTokenStream` is a pure function of the `Nonterminal`,
@@ -203,6 +234,11 @@ impl Token {
         Token::Interpolated(Lrc::new((nt, LazyTokenStream::new())))
     }
 
+    /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary.
+    pub fn from_ast_ident(ident: ast::Ident) -> Token {
+        Ident(ident, is_reserved_ident(ident))
+    }
+
     /// Returns `true` if the token starts with '>'.
     pub fn is_like_gt(&self) -> bool {
         match *self {
@@ -214,7 +250,8 @@ impl Token {
     /// Returns `true` if the token can appear at the start of an expression.
     pub fn can_begin_expr(&self) -> bool {
         match *self {
-            Ident(ident)                => ident_can_begin_expr(ident), // value name or keyword
+            Ident(ident, is_raw)              =>
+                ident_can_begin_expr(ident, is_raw), // value name or keyword
             OpenDelim(..)                     | // tuple, array or block
             Literal(..)                       | // literal
             Not                               | // operator not
@@ -239,7 +276,8 @@ impl Token {
     /// Returns `true` if the token can appear at the start of a type.
     pub fn can_begin_type(&self) -> bool {
         match *self {
-            Ident(ident)                => ident_can_begin_type(ident), // type name or keyword
+            Ident(ident, is_raw)        =>
+                ident_can_begin_type(ident, is_raw), // type name or keyword
             OpenDelim(Paren)            | // tuple
             OpenDelim(Bracket)          | // array
             Not                         | // never
@@ -272,17 +310,32 @@ impl Token {
         }
     }
 
-    pub fn ident(&self) -> Option<ast::Ident> {
+    fn ident_common(&self, allow_raw: bool) -> Option<ast::Ident> {
         match *self {
-            Ident(ident) => Some(ident),
+            Ident(ident, is_raw) if !is_raw || allow_raw => Some(ident),
             Interpolated(ref nt) => match nt.0 {
-                NtIdent(ident) => Some(ident.node),
+                NtIdent(ident, is_raw) if !is_raw || allow_raw => Some(ident.node),
                 _ => None,
             },
             _ => None,
         }
     }
 
+    pub fn nonraw_ident(&self) -> Option<ast::Ident> {
+        self.ident_common(false)
+    }
+
+    pub fn is_raw_ident(&self) -> bool {
+        match *self {
+            Ident(_, true) => true,
+            _ => false,
+        }
+    }
+
+    pub fn ident(&self) -> Option<ast::Ident> {
+        self.ident_common(true)
+    }
+
     /// Returns `true` if the token is an identifier.
     pub fn is_ident(&self) -> bool {
         self.ident().is_some()
@@ -351,18 +404,12 @@ impl Token {
 
     /// Returns `true` if the token is a given keyword, `kw`.
     pub fn is_keyword(&self, kw: keywords::Keyword) -> bool {
-        self.ident().map(|ident| ident.name == kw.name()).unwrap_or(false)
+        self.nonraw_ident().map(|ident| ident.name == kw.name()).unwrap_or(false)
     }
 
     pub fn is_path_segment_keyword(&self) -> bool {
-        match self.ident() {
-            Some(id) => id.name == keywords::Super.name() ||
-                        id.name == keywords::SelfValue.name() ||
-                        id.name == keywords::SelfType.name() ||
-                        id.name == keywords::Extern.name() ||
-                        id.name == keywords::Crate.name() ||
-                        id.name == keywords::CrateRoot.name() ||
-                        id.name == keywords::DollarCrate.name(),
+        match self.nonraw_ident() {
+            Some(id) => is_path_segment_keyword(id),
             None => false,
         }
     }
@@ -370,24 +417,24 @@ impl Token {
     // Returns true for reserved identifiers used internally for elided lifetimes,
     // unnamed method parameters, crate root module, error recovery etc.
     pub fn is_special_ident(&self) -> bool {
-        match self.ident() {
-            Some(id) => id.name <= keywords::Underscore.name(),
+        match self.nonraw_ident() {
+            Some(id) => is_special_ident(id),
             _ => false,
         }
     }
 
     /// Returns `true` if the token is a keyword used in the language.
     pub fn is_used_keyword(&self) -> bool {
-        match self.ident() {
-            Some(id) => id.name >= keywords::As.name() && id.name <= keywords::While.name(),
+        match self.nonraw_ident() {
+            Some(id) => is_used_keyword(id),
             _ => false,
         }
     }
 
     /// Returns `true` if the token is a keyword reserved for possible future use.
     pub fn is_unused_keyword(&self) -> bool {
-        match self.ident() {
-            Some(id) => id.name >= keywords::Abstract.name() && id.name <= keywords::Yield.name(),
+        match self.nonraw_ident() {
+            Some(id) => is_unused_keyword(id),
             _ => false,
         }
     }
@@ -460,7 +507,10 @@ impl Token {
 
     /// Returns `true` if the token is either a special identifier or a keyword.
     pub fn is_reserved_ident(&self) -> bool {
-        self.is_special_ident() || self.is_used_keyword() || self.is_unused_keyword()
+        match self.nonraw_ident() {
+            Some(id) => is_reserved_ident(id),
+            _ => false,
+        }
     }
 
     pub fn interpolated_to_tokenstream(&self, sess: &ParseSess, span: Span)
@@ -496,8 +546,8 @@ impl Token {
             Nonterminal::NtImplItem(ref item) => {
                 tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
             }
-            Nonterminal::NtIdent(ident) => {
-                let token = Token::Ident(ident.node);
+            Nonterminal::NtIdent(ident, is_raw) => {
+                let token = Token::Ident(ident.node, is_raw);
                 tokens = Some(TokenTree::Token(ident.span, token).into());
             }
             Nonterminal::NtLifetime(lifetime) => {
@@ -529,7 +579,7 @@ pub enum Nonterminal {
     NtPat(P<ast::Pat>),
     NtExpr(P<ast::Expr>),
     NtTy(P<ast::Ty>),
-    NtIdent(ast::SpannedIdent),
+    NtIdent(ast::SpannedIdent, /* is_raw */ bool),
     /// Stuff inside brackets for attributes
     NtMeta(ast::MetaItem),
     NtPath(ast::Path),