diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2018-03-23 07:31:15 -0700 |
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2018-03-23 10:16:40 -0700 |
| commit | 82bb41bdab6d6d5611573883bea5779ed43ca184 (patch) | |
| tree | 9e9aae42ae9d4bfbf97cc7fa263a33064e471c06 /src/libsyntax/parse | |
| parent | d58abe75bbb00f6cf3287a5d490afd518962ae54 (diff) | |
| parent | 57f9c4d6d9ba7d48b9f64193dd037a54e11ef7b4 (diff) | |
| download | rust-82bb41bdab6d6d5611573883bea5779ed43ca184.tar.gz rust-82bb41bdab6d6d5611573883bea5779ed43ca184.zip | |
Merge branch 'master' of https://github.com/Lymia/rust into rollup
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 74 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 24 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 29 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 87 |
4 files changed, 144 insertions, 70 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 0e20eb49d39..068929c8948 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -14,7 +14,7 @@ use codemap::{CodeMap, FilePathMapping}; use errors::{FatalError, DiagnosticBuilder}; use parse::{token, ParseSess}; use str::char_at; -use symbol::Symbol; +use symbol::{Symbol, keywords}; use std_unicode::property::Pattern_White_Space; use std::borrow::Cow; @@ -1115,26 +1115,53 @@ impl<'a> StringReader<'a> { /// token, and updates the interner fn next_token_inner(&mut self) -> Result<token::Token, ()> { let c = self.ch; - if ident_start(c) && - match (c.unwrap(), self.nextch(), self.nextnextch()) { - // Note: r as in r" or r#" is part of a raw string literal, - // b as in b' is part of a byte literal. - // They are not identifiers, and are handled further down. - ('r', Some('"'), _) | - ('r', Some('#'), _) | - ('b', Some('"'), _) | - ('b', Some('\''), _) | - ('b', Some('r'), Some('"')) | - ('b', Some('r'), Some('#')) => false, - _ => true, - } { - let start = self.pos; - while ident_continue(self.ch) { - self.bump(); - } - // FIXME: perform NFKC normalization here. (Issue #2253) - return Ok(self.with_str_from(start, |string| token::Ident(self.mk_ident(string)))); + if ident_start(c) { + let (is_ident_start, is_raw_ident) = + match (c.unwrap(), self.nextch(), self.nextnextch()) { + // r# followed by an identifier starter is a raw identifier. + // This is an exception to the r# case below. + ('r', Some('#'), x) if ident_start(x) => (true, true), + // r as in r" or r#" is part of a raw string literal. + // b as in b' is part of a byte literal. + // They are not identifiers, and are handled further down. + ('r', Some('"'), _) | + ('r', Some('#'), _) | + ('b', Some('"'), _) | + ('b', Some('\''), _) | + ('b', Some('r'), Some('"')) | + ('b', Some('r'), Some('#')) => (false, false), + _ => (true, false), + }; + if is_ident_start { + let raw_start = self.pos; + if is_raw_ident { + // Consume the 'r#' characters. + self.bump(); + self.bump(); + } + + let start = self.pos; + while ident_continue(self.ch) { + self.bump(); + } + + return Ok(self.with_str_from(start, |string| { + // FIXME: perform NFKC normalization here. (Issue #2253) + let ident = self.mk_ident(string); + if is_raw_ident && (token::is_path_segment_keyword(ident) || + ident.name == keywords::Underscore.name()) { + self.fatal_span_(raw_start, self.pos, + &format!("`r#{}` is not currently supported.", ident.name) + ).raise(); + } + if is_raw_ident { + let span = self.mk_sp(raw_start, self.pos); + self.sess.raw_identifier_spans.borrow_mut().push(span); + } + token::Ident(ident, is_raw_ident) + })); + } } if is_dec_digit(c) { @@ -1773,6 +1800,7 @@ mod tests { included_mod_stack: RefCell::new(Vec::new()), code_map: cm, missing_fragment_specifiers: RefCell::new(HashSet::new()), + raw_identifier_spans: RefCell::new(Vec::new()), registered_diagnostics: Lock::new(ErrorMap::new()), non_modrs_mods: RefCell::new(vec![]), } @@ -1801,7 +1829,7 @@ mod tests { assert_eq!(string_reader.next_token().tok, token::Whitespace); let tok1 = string_reader.next_token(); let tok2 = TokenAndSpan { - tok: token::Ident(id), + tok: token::Ident(id, false), sp: Span::new(BytePos(21), BytePos(23), NO_EXPANSION), }; assert_eq!(tok1, tok2); @@ -1811,7 +1839,7 @@ mod tests { // read another token: let tok3 = string_reader.next_token(); let tok4 = TokenAndSpan { - tok: token::Ident(Ident::from_str("main")), + tok: mk_ident("main"), sp: Span::new(BytePos(24), BytePos(28), NO_EXPANSION), }; assert_eq!(tok3, tok4); @@ -1830,7 +1858,7 @@ mod tests { // make the identifier by looking up the string in the interner fn mk_ident(id: &str) -> token::Token { - token::Ident(Ident::from_str(id)) + token::Token::from_ast_ident(Ident::from_str(id)) } #[test] diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 202dc03eaa4..7b39db16ac2 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -48,6 +48,9 @@ pub struct ParseSess { pub unstable_features: UnstableFeatures, pub config: CrateConfig, pub missing_fragment_specifiers: RefCell<HashSet<Span>>, + /// Places where raw identifiers were used. This is used for feature gating + /// raw identifiers + pub raw_identifier_spans: RefCell<Vec<Span>>, /// The registered diagnostics codes pub registered_diagnostics: Lock<ErrorMap>, // Spans where a `mod foo;` statement was included in a non-mod.rs file. @@ -74,6 +77,7 @@ impl ParseSess { unstable_features: UnstableFeatures::from_environment(), config: HashSet::new(), missing_fragment_specifiers: RefCell::new(HashSet::new()), + raw_identifier_spans: RefCell::new(Vec::new()), registered_diagnostics: Lock::new(ErrorMap::new()), included_mod_stack: RefCell::new(vec![]), code_map, @@ -739,9 +743,9 @@ mod tests { match (tts.len(), tts.get(0), tts.get(1), tts.get(2), tts.get(3)) { ( 4, - Some(&TokenTree::Token(_, token::Ident(name_macro_rules))), + Some(&TokenTree::Token(_, token::Ident(name_macro_rules, false))), Some(&TokenTree::Token(_, token::Not)), - Some(&TokenTree::Token(_, token::Ident(name_zip))), + Some(&TokenTree::Token(_, token::Ident(name_zip, false))), Some(&TokenTree::Delimited(_, ref macro_delimed)), ) if name_macro_rules.name == "macro_rules" @@ -760,7 +764,7 @@ mod tests { ( 2, Some(&TokenTree::Token(_, token::Dollar)), - Some(&TokenTree::Token(_, token::Ident(ident))), + Some(&TokenTree::Token(_, token::Ident(ident, false))), ) if first_delimed.delim == token::Paren && ident.name == "a" => {}, _ => panic!("value 3: {:?}", *first_delimed), @@ -770,7 +774,7 @@ mod tests { ( 2, Some(&TokenTree::Token(_, token::Dollar)), - Some(&TokenTree::Token(_, token::Ident(ident))), + Some(&TokenTree::Token(_, token::Ident(ident, false))), ) if second_delimed.delim == token::Paren && ident.name == "a" => {}, @@ -791,17 +795,18 @@ mod tests { let tts = string_to_stream("fn a (b : i32) { b; }".to_string()); let expected = TokenStream::concat(vec![ - TokenTree::Token(sp(0, 2), token::Ident(Ident::from_str("fn"))).into(), - TokenTree::Token(sp(3, 4), token::Ident(Ident::from_str("a"))).into(), + TokenTree::Token(sp(0, 2), token::Ident(Ident::from_str("fn"), false)).into(), + TokenTree::Token(sp(3, 4), token::Ident(Ident::from_str("a"), false)).into(), TokenTree::Delimited( sp(5, 14), tokenstream::Delimited { delim: token::DelimToken::Paren, tts: TokenStream::concat(vec![ - TokenTree::Token(sp(6, 7), token::Ident(Ident::from_str("b"))).into(), + TokenTree::Token(sp(6, 7), + token::Ident(Ident::from_str("b"), false)).into(), TokenTree::Token(sp(8, 9), token::Colon).into(), TokenTree::Token(sp(10, 13), - token::Ident(Ident::from_str("i32"))).into(), + token::Ident(Ident::from_str("i32"), false)).into(), ]).into(), }).into(), TokenTree::Delimited( @@ -809,7 +814,8 @@ mod tests { tokenstream::Delimited { delim: token::DelimToken::Brace, tts: TokenStream::concat(vec![ - TokenTree::Token(sp(17, 18), token::Ident(Ident::from_str("b"))).into(), + TokenTree::Token(sp(17, 18), + token::Ident(Ident::from_str("b"), false)).into(), TokenTree::Token(sp(18, 19), token::Semi).into(), ]).into(), }).into() diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 6d8975197d5..a07279acae2 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -358,7 +358,7 @@ impl TokenCursor { let body = TokenTree::Delimited(sp, Delimited { delim: token::Bracket, - tts: [TokenTree::Token(sp, token::Ident(ast::Ident::from_str("doc"))), + tts: [TokenTree::Token(sp, token::Ident(ast::Ident::from_str("doc"), false)), TokenTree::Token(sp, token::Eq), TokenTree::Token(sp, token::Literal( token::StrRaw(Symbol::intern(&stripped), num_of_hashes), None))] @@ -784,7 +784,7 @@ impl<'a> Parser<'a> { fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, ast::Ident> { match self.token { - token::Ident(i) => { + token::Ident(i, _) => { if self.token.is_reserved_ident() { let mut err = self.expected_ident_found(); if recover { @@ -1925,7 +1925,7 @@ impl<'a> Parser<'a> { pub fn parse_path_segment_ident(&mut self) -> PResult<'a, ast::Ident> { match self.token { - token::Ident(sid) if self.token.is_path_segment_keyword() => { + token::Ident(sid, _) if self.token.is_path_segment_keyword() => { self.bump(); Ok(sid) } @@ -2740,11 +2740,14 @@ impl<'a> Parser<'a> { } pub fn process_potential_macro_variable(&mut self) { - let ident = match self.token { + let (ident, is_raw) = match self.token { token::Dollar if self.span.ctxt() != syntax_pos::hygiene::SyntaxContext::empty() && self.look_ahead(1, |t| t.is_ident()) => { self.bump(); - let name = match self.token { token::Ident(ident) => ident, _ => unreachable!() }; + let name = match self.token { + token::Ident(ident, _) => ident, + _ => unreachable!() + }; let mut err = self.fatal(&format!("unknown macro variable `{}`", name)); err.span_label(self.span, "unknown macro variable"); err.emit(); @@ -2753,13 +2756,13 @@ impl<'a> Parser<'a> { token::Interpolated(ref nt) => { self.meta_var_span = Some(self.span); match nt.0 { - token::NtIdent(ident) => ident, + token::NtIdent(ident, is_raw) => (ident, is_raw), _ => return, } } _ => return, }; - self.token = token::Ident(ident.node); + self.token = token::Ident(ident.node, is_raw); self.span = ident.span; } @@ -4245,7 +4248,7 @@ impl<'a> Parser<'a> { -> PResult<'a, Option<P<Item>>> { let token_lo = self.span; let (ident, def) = match self.token { - token::Ident(ident) if ident.name == keywords::Macro.name() => { + token::Ident(ident, false) if ident.name == keywords::Macro.name() => { self.bump(); let ident = self.parse_ident()?; let tokens = if self.check(&token::OpenDelim(token::Brace)) { @@ -4273,7 +4276,7 @@ impl<'a> Parser<'a> { (ident, ast::MacroDef { tokens: tokens.into(), legacy: false }) } - token::Ident(ident) if ident.name == "macro_rules" && + token::Ident(ident, _) if ident.name == "macro_rules" && self.look_ahead(1, |t| *t == token::Not) => { let prev_span = self.prev_span; self.complain_if_pub_macro(&vis.node, prev_span); @@ -5078,7 +5081,9 @@ impl<'a> Parser<'a> { fn parse_self_arg(&mut self) -> PResult<'a, Option<Arg>> { let expect_ident = |this: &mut Self| match this.token { // Preserve hygienic context. - token::Ident(ident) => { let sp = this.span; this.bump(); codemap::respan(sp, ident) } + token::Ident(ident, _) => { + let sp = this.span; this.bump(); codemap::respan(sp, ident) + } _ => unreachable!() }; let isolated_self = |this: &mut Self, n| { @@ -5375,7 +5380,7 @@ impl<'a> Parser<'a> { VisibilityKind::Inherited => Ok(()), _ => { let is_macro_rules: bool = match self.token { - token::Ident(sid) => sid.name == Symbol::intern("macro_rules"), + token::Ident(sid, _) => sid.name == Symbol::intern("macro_rules"), _ => false, }; if is_macro_rules { @@ -7016,7 +7021,7 @@ impl<'a> Parser<'a> { fn parse_rename(&mut self) -> PResult<'a, Option<Ident>> { if self.eat_keyword(keywords::As) { match self.token { - token::Ident(ident) if ident.name == keywords::Underscore.name() => { + token::Ident(ident, false) if ident.name == keywords::Underscore.name() => { self.bump(); // `_` Ok(Some(Ident { name: ident.name.gensymed(), ..ident })) } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 4ada9e20f2c..7798a7a77ee 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -91,8 +91,8 @@ impl Lit { } } -fn ident_can_begin_expr(ident: ast::Ident) -> bool { - let ident_token: Token = Ident(ident); +fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool { + let ident_token: Token = Ident(ident, is_raw); !ident_token.is_reserved_ident() || ident_token.is_path_segment_keyword() || @@ -116,8 +116,8 @@ fn ident_can_begin_expr(ident: ast::Ident) -> bool { ].contains(&ident.name) } -fn ident_can_begin_type(ident: ast::Ident) -> bool { - let ident_token: Token = Ident(ident); +fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool { + let ident_token: Token = Ident(ident, is_raw); !ident_token.is_reserved_ident() || ident_token.is_path_segment_keyword() || @@ -132,6 +132,37 @@ fn ident_can_begin_type(ident: ast::Ident) -> bool { ].contains(&ident.name) } +pub fn is_path_segment_keyword(id: ast::Ident) -> bool { + id.name == keywords::Super.name() || + id.name == keywords::SelfValue.name() || + id.name == keywords::SelfType.name() || + id.name == keywords::Extern.name() || + id.name == keywords::Crate.name() || + id.name == keywords::CrateRoot.name() || + id.name == keywords::DollarCrate.name() +} + +// Returns true for reserved identifiers used internally for elided lifetimes, +// unnamed method parameters, crate root module, error recovery etc. +pub fn is_special_ident(id: ast::Ident) -> bool { + id.name <= keywords::Underscore.name() +} + +/// Returns `true` if the token is a keyword used in the language. +pub fn is_used_keyword(id: ast::Ident) -> bool { + id.name >= keywords::As.name() && id.name <= keywords::While.name() +} + +/// Returns `true` if the token is a keyword reserved for possible future use. +pub fn is_unused_keyword(id: ast::Ident) -> bool { + id.name >= keywords::Abstract.name() && id.name <= keywords::Yield.name() +} + +/// Returns `true` if the token is either a special identifier or a keyword. +pub fn is_reserved_ident(id: ast::Ident) -> bool { + is_special_ident(id) || is_used_keyword(id) || is_unused_keyword(id) +} + #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug)] pub enum Token { /* Expression-operator symbols. */ @@ -175,7 +206,7 @@ pub enum Token { Literal(Lit, Option<ast::Name>), /* Name components */ - Ident(ast::Ident), + Ident(ast::Ident, /* is_raw */ bool), Lifetime(ast::Ident), // The `LazyTokenStream` is a pure function of the `Nonterminal`, @@ -203,6 +234,11 @@ impl Token { Token::Interpolated(Lrc::new((nt, LazyTokenStream::new()))) } + /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary. + pub fn from_ast_ident(ident: ast::Ident) -> Token { + Ident(ident, is_reserved_ident(ident) && !is_path_segment_keyword(ident)) + } + /// Returns `true` if the token starts with '>'. pub fn is_like_gt(&self) -> bool { match *self { @@ -214,7 +250,8 @@ impl Token { /// Returns `true` if the token can appear at the start of an expression. pub fn can_begin_expr(&self) -> bool { match *self { - Ident(ident) => ident_can_begin_expr(ident), // value name or keyword + Ident(ident, is_raw) => + ident_can_begin_expr(ident, is_raw), // value name or keyword OpenDelim(..) | // tuple, array or block Literal(..) | // literal Not | // operator not @@ -239,7 +276,8 @@ impl Token { /// Returns `true` if the token can appear at the start of a type. pub fn can_begin_type(&self) -> bool { match *self { - Ident(ident) => ident_can_begin_type(ident), // type name or keyword + Ident(ident, is_raw) => + ident_can_begin_type(ident, is_raw), // type name or keyword OpenDelim(Paren) | // tuple OpenDelim(Bracket) | // array Not | // never @@ -272,11 +310,11 @@ impl Token { } } - pub fn ident(&self) -> Option<ast::Ident> { + pub fn ident(&self) -> Option<(ast::Ident, bool)> { match *self { - Ident(ident) => Some(ident), + Ident(ident, is_raw) => Some((ident, is_raw)), Interpolated(ref nt) => match nt.0 { - NtIdent(ident) => Some(ident.node), + NtIdent(ident, is_raw) => Some((ident.node, is_raw)), _ => None, }, _ => None, @@ -351,19 +389,13 @@ impl Token { /// Returns `true` if the token is a given keyword, `kw`. pub fn is_keyword(&self, kw: keywords::Keyword) -> bool { - self.ident().map(|ident| ident.name == kw.name()).unwrap_or(false) + self.ident().map(|(ident, is_raw)| ident.name == kw.name() && !is_raw).unwrap_or(false) } pub fn is_path_segment_keyword(&self) -> bool { match self.ident() { - Some(id) => id.name == keywords::Super.name() || - id.name == keywords::SelfValue.name() || - id.name == keywords::SelfType.name() || - id.name == keywords::Extern.name() || - id.name == keywords::Crate.name() || - id.name == keywords::CrateRoot.name() || - id.name == keywords::DollarCrate.name(), - None => false, + Some((id, false)) => is_path_segment_keyword(id), + _ => false, } } @@ -371,7 +403,7 @@ impl Token { // unnamed method parameters, crate root module, error recovery etc. pub fn is_special_ident(&self) -> bool { match self.ident() { - Some(id) => id.name <= keywords::Underscore.name(), + Some((id, false)) => is_special_ident(id), _ => false, } } @@ -379,7 +411,7 @@ impl Token { /// Returns `true` if the token is a keyword used in the language. pub fn is_used_keyword(&self) -> bool { match self.ident() { - Some(id) => id.name >= keywords::As.name() && id.name <= keywords::While.name(), + Some((id, false)) => is_used_keyword(id), _ => false, } } @@ -387,7 +419,7 @@ impl Token { /// Returns `true` if the token is a keyword reserved for possible future use. pub fn is_unused_keyword(&self) -> bool { match self.ident() { - Some(id) => id.name >= keywords::Abstract.name() && id.name <= keywords::Yield.name(), + Some((id, false)) => is_unused_keyword(id), _ => false, } } @@ -460,7 +492,10 @@ impl Token { /// Returns `true` if the token is either a special identifier or a keyword. pub fn is_reserved_ident(&self) -> bool { - self.is_special_ident() || self.is_used_keyword() || self.is_unused_keyword() + match self.ident() { + Some((id, false)) => is_reserved_ident(id), + _ => false, + } } pub fn interpolated_to_tokenstream(&self, sess: &ParseSess, span: Span) @@ -496,8 +531,8 @@ impl Token { Nonterminal::NtImplItem(ref item) => { tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span); } - Nonterminal::NtIdent(ident) => { - let token = Token::Ident(ident.node); + Nonterminal::NtIdent(ident, is_raw) => { + let token = Token::Ident(ident.node, is_raw); tokens = Some(TokenTree::Token(ident.span, token).into()); } Nonterminal::NtLifetime(lifetime) => { @@ -529,7 +564,7 @@ pub enum Nonterminal { NtPat(P<ast::Pat>), NtExpr(P<ast::Expr>), NtTy(P<ast::Ty>), - NtIdent(ast::SpannedIdent), + NtIdent(ast::SpannedIdent, /* is_raw */ bool), /// Stuff inside brackets for attributes NtMeta(ast::MetaItem), NtPath(ast::Path), |
