diff options
| author | Jeffrey Seyfried <jeffrey.seyfried@gmail.com> | 2016-11-16 08:21:52 +0000 |
|---|---|---|
| committer | Jeffrey Seyfried <jeffrey.seyfried@gmail.com> | 2016-11-20 23:40:20 +0000 |
| commit | d2f8fb0a0a9dd98ea9d6a01620f1a21f425236c0 (patch) | |
| tree | 941a81af819b4ff81b47fd0f50241477e9090d2d /src/libsyntax/parse | |
| parent | f177a00ac92b871add90ca559a8591c4647a3c7e (diff) | |
| download | rust-d2f8fb0a0a9dd98ea9d6a01620f1a21f425236c0.tar.gz rust-d2f8fb0a0a9dd98ea9d6a01620f1a21f425236c0.zip | |
Move `syntax::util::interner` -> `syntax::symbol`, cleanup.
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 67 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 8 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 11 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 268 |
4 files changed, 45 insertions, 309 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index cf48c445c80..7048be1478b 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -8,13 +8,14 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use ast; +use ast::{self, Ident}; use syntax_pos::{self, BytePos, CharPos, Pos, Span}; use codemap::CodeMap; use errors::{FatalError, Handler, DiagnosticBuilder}; use ext::tt::transcribe::tt_next_token; -use parse::token::{self, keywords, str_to_ident}; +use parse::token; use str::char_at; +use symbol::{Symbol, keywords}; use rustc_unicode::property::Pattern_White_Space; use std::borrow::Cow; @@ -350,13 +351,13 @@ impl<'a> StringReader<'a> { /// single-byte delimiter). pub fn name_from(&self, start: BytePos) -> ast::Name { debug!("taking an ident from {:?} to {:?}", start, self.pos); - self.with_str_from(start, token::intern) + self.with_str_from(start, Symbol::intern) } /// As name_from, with an explicit endpoint. pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name { debug!("taking an ident from {:?} to {:?}", start, end); - self.with_str_from_to(start, end, token::intern) + self.with_str_from_to(start, end, Symbol::intern) } /// Calls `f` with a string slice of the source text spanning from `start` @@ -492,7 +493,7 @@ impl<'a> StringReader<'a> { if string == "_" { None } else { - Some(token::intern(string)) + Some(Symbol::intern(string)) } }) } @@ -540,7 +541,7 @@ impl<'a> StringReader<'a> { self.with_str_from(start_bpos, |string| { // comments with only more "/"s are not doc comments let tok = if is_doc_comment(string) { - token::DocComment(token::intern(string)) + token::DocComment(Symbol::intern(string)) } else { token::Comment }; @@ -669,7 +670,7 @@ impl<'a> StringReader<'a> { } else { string.into() }; - token::DocComment(token::intern(&string[..])) + token::DocComment(Symbol::intern(&string[..])) } else { token::Comment }; @@ -758,7 +759,7 @@ impl<'a> StringReader<'a> { self.err_span_(start_bpos, self.pos, "no valid digits found for number"); - return token::Integer(token::intern("0")); + return token::Integer(Symbol::intern("0")); } // might be a float, but don't be greedy if this is actually an @@ -1097,7 +1098,7 @@ impl<'a> StringReader<'a> { token::Underscore } else { // FIXME: perform NFKC normalization here. (Issue #2253) - token::Ident(str_to_ident(string)) + token::Ident(Ident::from_str(string)) } })); } @@ -1277,13 +1278,13 @@ impl<'a> StringReader<'a> { // expansion purposes. See #12512 for the gory details of why // this is necessary. let ident = self.with_str_from(start, |lifetime_name| { - str_to_ident(&format!("'{}", lifetime_name)) + Ident::from_str(&format!("'{}", lifetime_name)) }); // Conjure up a "keyword checking ident" to make sure that // the lifetime name is not a keyword. let keyword_checking_ident = self.with_str_from(start, |lifetime_name| { - str_to_ident(lifetime_name) + Ident::from_str(lifetime_name) }); let keyword_checking_token = &token::Ident(keyword_checking_ident); let last_bpos = self.pos; @@ -1310,7 +1311,7 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start) } else { - token::intern("0") + Symbol::intern("0") }; self.bump(); // advance ch past token let suffix = self.scan_optional_raw_name(); @@ -1352,7 +1353,7 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start_bpos + BytePos(1)) } else { - token::intern("??") + Symbol::intern("??") }; self.bump(); let suffix = self.scan_optional_raw_name(); @@ -1424,7 +1425,7 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from_to(content_start_bpos, content_end_bpos) } else { - token::intern("??") + Symbol::intern("??") }; let suffix = self.scan_optional_raw_name(); return Ok(token::Literal(token::StrRaw(id, hash_count), suffix)); @@ -1551,7 +1552,7 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start) } else { - token::intern("?") + Symbol::intern("?") }; self.bump(); // advance ch past token return token::Byte(id); @@ -1584,7 +1585,7 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start) } else { - token::intern("??") + Symbol::intern("??") }; self.bump(); return token::ByteStr(id); @@ -1700,11 +1701,11 @@ fn ident_continue(c: Option<char>) -> bool { mod tests { use super::*; + use ast::Ident; use syntax_pos::{BytePos, Span, NO_EXPANSION}; use codemap::CodeMap; use errors; use parse::token; - use parse::token::str_to_ident; use std::io; use std::rc::Rc; @@ -1732,7 +1733,7 @@ mod tests { &sh, "/* my source file */ fn main() { println!(\"zebra\"); }\n" .to_string()); - let id = str_to_ident("fn"); + let id = Ident::from_str("fn"); assert_eq!(string_reader.next_token().tok, token::Comment); assert_eq!(string_reader.next_token().tok, token::Whitespace); let tok1 = string_reader.next_token(); @@ -1813,7 +1814,7 @@ mod tests { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok, - token::Literal(token::Char(token::intern("a")), None)); + token::Literal(token::Char(Symbol::intern("a")), None)); } #[test] @@ -1821,7 +1822,7 @@ mod tests { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok, - token::Literal(token::Char(token::intern(" ")), None)); + token::Literal(token::Char(Symbol::intern(" ")), None)); } #[test] @@ -1829,7 +1830,7 @@ mod tests { let cm = Rc::new(CodeMap::new()); let sh = mk_sh(cm.clone()); assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok, - token::Literal(token::Char(token::intern("\\n")), None)); + token::Literal(token::Char(Symbol::intern("\\n")), None)); } #[test] @@ -1847,7 +1848,7 @@ mod tests { assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()) .next_token() .tok, - token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None)); + token::Literal(token::StrRaw(Symol::intern("\"#a\\b\x00c\""), 3), None)); } #[test] @@ -1857,11 +1858,11 @@ mod tests { macro_rules! test { ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ assert_eq!(setup(&cm, &sh, format!("{}suffix", $input)).next_token().tok, - token::Literal(token::$tok_type(token::intern($tok_contents)), - Some(token::intern("suffix")))); + token::Literal(token::$tok_type(Symbol::intern($tok_contents)), + Some(Symbol::intern("suffix")))); // with a whitespace separator: assert_eq!(setup(&cm, &sh, format!("{} suffix", $input)).next_token().tok, - token::Literal(token::$tok_type(token::intern($tok_contents)), + token::Literal(token::$tok_type(Symbol::intern($tok_contents)), None)); }} } @@ -1877,14 +1878,14 @@ mod tests { test!("1.0e10", Float, "1.0e10"); assert_eq!(setup(&cm, &sh, "2us".to_string()).next_token().tok, - token::Literal(token::Integer(token::intern("2")), - Some(token::intern("us")))); + token::Literal(token::Integer(Symbol::intern("2")), + Some(Symbol::intern("us")))); assert_eq!(setup(&cm, &sh, "r###\"raw\"###suffix".to_string()).next_token().tok, - token::Literal(token::StrRaw(token::intern("raw"), 3), - Some(token::intern("suffix")))); + token::Literal(token::StrRaw(Symbol::intern("raw"), 3), + Some(Symbol::intern("suffix")))); assert_eq!(setup(&cm, &sh, "br###\"raw\"###suffix".to_string()).next_token().tok, - token::Literal(token::ByteStrRaw(token::intern("raw"), 3), - Some(token::intern("suffix")))); + token::Literal(token::ByteStrRaw(Symbol::intern("raw"), 3), + Some(Symbol::intern("suffix")))); } #[test] @@ -1904,7 +1905,7 @@ mod tests { _ => panic!("expected a comment!"), } assert_eq!(lexer.next_token().tok, - token::Literal(token::Char(token::intern("a")), None)); + token::Literal(token::Char(Symbol::intern("a")), None)); } #[test] @@ -1917,6 +1918,6 @@ mod tests { assert_eq!(comment.sp, ::syntax_pos::mk_sp(BytePos(0), BytePos(7))); assert_eq!(lexer.next_token().tok, token::Whitespace); assert_eq!(lexer.next_token().tok, - token::DocComment(token::intern("/// test"))); + token::DocComment(Symbol::intern("/// test"))); } } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 19e8b711ba4..a44f78b3c3a 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -16,9 +16,9 @@ use syntax_pos::{self, Span, FileMap}; use errors::{Handler, ColorConfig, DiagnosticBuilder}; use feature_gate::UnstableFeatures; use parse::parser::Parser; -use parse::token::InternedString; use ptr::P; use str::char_at; +use symbol::{self, InternedString}; use tokenstream; use std::cell::RefCell; @@ -372,7 +372,7 @@ fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { s[1..].chars().all(|c| '0' <= c && c <= '9') } -fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>, +fn filtered_float_lit(data: InternedString, suffix: Option<&str>, sd: &Handler, sp: Span) -> ast::LitKind { debug!("filtered_float_lit: {}, {:?}", data, suffix); match suffix.as_ref().map(|s| &**s) { @@ -400,7 +400,7 @@ pub fn float_lit(s: &str, suffix: Option<InternedString>, debug!("float_lit: {:?}, {:?}", s, suffix); // FIXME #2252: bounds checking float literals is deferred until trans let s = s.chars().filter(|&c| c != '_').collect::<String>(); - let data = token::intern_and_get_ident(&s); + let data = symbol::intern_and_get_ident(&s); filtered_float_lit(data, suffix.as_ref().map(|s| &**s), sd, sp) } @@ -530,7 +530,7 @@ pub fn integer_lit(s: &str, 2 => sd.span_err(sp, "binary float literal is not supported"), _ => () } - let ident = token::intern_and_get_ident(&s); + let ident = symbol::intern_and_get_ident(&s); return filtered_float_lit(ident, Some(&suf), sd, sp) } } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 98ce00c7d38..13c701795a8 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -48,13 +48,14 @@ use parse::classify; use parse::common::SeqSep; use parse::lexer::{Reader, TokenAndSpan}; use parse::obsolete::ObsoleteSyntax; -use parse::token::{self, intern, keywords, MatchNt, SubstNt, InternedString}; +use parse::token::{self, MatchNt, SubstNt}; use parse::{new_sub_parser_from_file, ParseSess}; use util::parser::{AssocOp, Fixity}; use print::pprust; use ptr::P; use parse::PResult; use tokenstream::{self, Delimited, SequenceRepetition, TokenTree}; +use symbol::{self, Symbol, keywords, InternedString}; use util::ThinVec; use std::collections::HashSet; @@ -1537,13 +1538,13 @@ impl<'a> Parser<'a> { token::Str_(s) => { (true, - LitKind::Str(token::intern_and_get_ident(&parse::str_lit(&s.as_str())), + LitKind::Str(symbol::intern_and_get_ident(&parse::str_lit(&s.as_str())), ast::StrStyle::Cooked)) } token::StrRaw(s, n) => { (true, LitKind::Str( - token::intern_and_get_ident(&parse::raw_str_lit(&s.as_str())), + symbol::intern_and_get_ident(&parse::raw_str_lit(&s.as_str())), ast::StrStyle::Raw(n))) } token::ByteStr(i) => @@ -2627,7 +2628,7 @@ impl<'a> Parser<'a> { }))); } else if self.token.is_keyword(keywords::Crate) { let ident = match self.token { - token::Ident(id) => ast::Ident { name: token::intern("$crate"), ..id }, + token::Ident(id) => ast::Ident { name: Symbol::intern("$crate"), ..id }, _ => unreachable!(), }; self.bump(); @@ -4835,7 +4836,7 @@ impl<'a> Parser<'a> { Visibility::Inherited => (), _ => { let is_macro_rules: bool = match self.token { - token::Ident(sid) => sid.name == intern("macro_rules"), + token::Ident(sid) => sid.name == Symbol::intern("macro_rules"), _ => false, }; if is_macro_rules { diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 4aaa028ef75..8ac39dd462e 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -16,13 +16,10 @@ pub use self::Token::*; use ast::{self}; use ptr::P; -use util::interner::Interner; +use symbol::keywords; use tokenstream; -use serialize::{Decodable, Decoder, Encodable, Encoder}; -use std::cell::RefCell; use std::fmt; -use std::ops::Deref; use std::rc::Rc; #[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)] @@ -335,266 +332,3 @@ impl fmt::Debug for Nonterminal { } } } - -// In this macro, there is the requirement that the name (the number) must be monotonically -// increasing by one in the special identifiers, starting at 0; the same holds for the keywords, -// except starting from the next number instead of zero. -macro_rules! declare_keywords {( - $( ($index: expr, $konst: ident, $string: expr) )* -) => { - pub mod keywords { - use ast; - #[derive(Clone, Copy, PartialEq, Eq)] - pub struct Keyword { - ident: ast::Ident, - } - impl Keyword { - #[inline] pub fn ident(self) -> ast::Ident { self.ident } - #[inline] pub fn name(self) -> ast::Name { self.ident.name } - } - $( - #[allow(non_upper_case_globals)] - pub const $konst: Keyword = Keyword { - ident: ast::Ident::with_empty_ctxt(ast::Name($index)) - }; - )* - } - - fn mk_fresh_ident_interner() -> IdentInterner { - Interner::prefill(&[$($string,)*]) - } -}} - -// NB: leaving holes in the ident table is bad! a different ident will get -// interned with the id from the hole, but it will be between the min and max -// of the reserved words, and thus tagged as "reserved". -// After modifying this list adjust `is_strict_keyword`/`is_reserved_keyword`, -// this should be rarely necessary though if the keywords are kept in alphabetic order. -declare_keywords! { - // Invalid identifier - (0, Invalid, "") - - // Strict keywords used in the language. - (1, As, "as") - (2, Box, "box") - (3, Break, "break") - (4, Const, "const") - (5, Continue, "continue") - (6, Crate, "crate") - (7, Else, "else") - (8, Enum, "enum") - (9, Extern, "extern") - (10, False, "false") - (11, Fn, "fn") - (12, For, "for") - (13, If, "if") - (14, Impl, "impl") - (15, In, "in") - (16, Let, "let") - (17, Loop, "loop") - (18, Match, "match") - (19, Mod, "mod") - (20, Move, "move") - (21, Mut, "mut") - (22, Pub, "pub") - (23, Ref, "ref") - (24, Return, "return") - (25, SelfValue, "self") - (26, SelfType, "Self") - (27, Static, "static") - (28, Struct, "struct") - (29, Super, "super") - (30, Trait, "trait") - (31, True, "true") - (32, Type, "type") - (33, Unsafe, "unsafe") - (34, Use, "use") - (35, Where, "where") - (36, While, "while") - - // Keywords reserved for future use. - (37, Abstract, "abstract") - (38, Alignof, "alignof") - (39, Become, "become") - (40, Do, "do") - (41, Final, "final") - (42, Macro, "macro") - (43, Offsetof, "offsetof") - (44, Override, "override") - (45, Priv, "priv") - (46, Proc, "proc") - (47, Pure, "pure") - (48, Sizeof, "sizeof") - (49, Typeof, "typeof") - (50, Unsized, "unsized") - (51, Virtual, "virtual") - (52, Yield, "yield") - - // Weak keywords, have special meaning only in specific contexts. - (53, Default, "default") - (54, StaticLifetime, "'static") - (55, Union, "union") -} - -// looks like we can get rid of this completely... -pub type IdentInterner = Interner; - -// if an interner exists in TLS, return it. Otherwise, prepare a -// fresh one. -// FIXME(eddyb) #8726 This should probably use a thread-local reference. -pub fn with_ident_interner<T, F: FnOnce(&mut IdentInterner) -> T>(f: F) -> T { - thread_local!(static KEY: RefCell<IdentInterner> = { - RefCell::new(mk_fresh_ident_interner()) - }); - KEY.with(|interner| f(&mut *interner.borrow_mut())) -} - -/// Reset the ident interner to its initial state. -pub fn reset_ident_interner() { - with_ident_interner(|interner| *interner = mk_fresh_ident_interner()); -} - -/// Represents a string stored in the thread-local interner. Because the -/// interner lives for the life of the thread, this can be safely treated as an -/// immortal string, as long as it never crosses between threads. -/// -/// FIXME(pcwalton): You must be careful about what you do in the destructors -/// of objects stored in TLS, because they may run after the interner is -/// destroyed. In particular, they must not access string contents. This can -/// be fixed in the future by just leaking all strings until thread death -/// somehow. -#[derive(Clone, PartialEq, Hash, PartialOrd, Eq, Ord)] -pub struct InternedString { - string: Rc<str>, -} - -impl InternedString { - #[inline] - pub fn new(string: &'static str) -> InternedString { - InternedString { - string: Rc::__from_str(string), - } - } - - #[inline] - pub fn new_from_name(name: ast::Name) -> InternedString { - with_ident_interner(|interner| InternedString { string: interner.get(name) }) - } -} - -impl Deref for InternedString { - type Target = str; - - fn deref(&self) -> &str { &self.string } -} - -impl fmt::Debug for InternedString { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Debug::fmt(&self.string, f) - } -} - -impl fmt::Display for InternedString { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(&self.string, f) - } -} - -impl<'a> PartialEq<&'a str> for InternedString { - #[inline(always)] - fn eq(&self, other: & &'a str) -> bool { - PartialEq::eq(&self.string[..], *other) - } - #[inline(always)] - fn ne(&self, other: & &'a str) -> bool { - PartialEq::ne(&self.string[..], *other) - } -} - -impl<'a> PartialEq<InternedString> for &'a str { - #[inline(always)] - fn eq(&self, other: &InternedString) -> bool { - PartialEq::eq(*self, &other.string[..]) - } - #[inline(always)] - fn ne(&self, other: &InternedString) -> bool { - PartialEq::ne(*self, &other.string[..]) - } -} - -impl PartialEq<str> for InternedString { - #[inline(always)] - fn eq(&self, other: &str) -> bool { - PartialEq::eq(&self.string[..], other) - } - #[inline(always)] - fn ne(&self, other: &str) -> bool { - PartialEq::ne(&self.string[..], other) - } -} - -impl PartialEq<InternedString> for str { - #[inline(always)] - fn eq(&self, other: &InternedString) -> bool { - PartialEq::eq(self, &other.string[..]) - } - #[inline(always)] - fn ne(&self, other: &InternedString) -> bool { - PartialEq::ne(self, &other.string[..]) - } -} - -impl Decodable for InternedString { - fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> { - Ok(intern(&d.read_str()?).as_str()) - } -} - -impl Encodable for InternedString { - fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { - s.emit_str(&self.string) - } -} - -/// Interns and returns the string contents of an identifier, using the -/// thread-local interner. -#[inline] -pub fn intern_and_get_ident(s: &str) -> InternedString { - intern(s).as_str() -} - -/// Maps a string to its interned representation. -#[inline] -pub fn intern(s: &str) -> ast::Name { - with_ident_interner(|interner| interner.intern(s)) -} - -/// gensym's a new usize, using the current interner. -#[inline] -pub fn gensym(s: &str) -> ast::Name { - with_ident_interner(|interner| interner.gensym(s)) -} - -/// Maps a string to an identifier with an empty syntax context. -#[inline] -pub fn str_to_ident(s: &str) -> ast::Ident { - ast::Ident::with_empty_ctxt(intern(s)) -} - -/// Maps a string to a gensym'ed identifier. -#[inline] -pub fn gensym_ident(s: &str) -> ast::Ident { - ast::Ident::with_empty_ctxt(gensym(s)) -} - -// create a fresh name that maps to the same string as the old one. -// note that this guarantees that str_ptr_eq(ident_to_string(src),interner_get(fresh_name(src))); -// that is, that the new name and the old one are connected to ptr_eq strings. -pub fn fresh_name(src: ast::Ident) -> ast::Name { - with_ident_interner(|interner| interner.gensym_copy(src.name)) - // following: debug version. Could work in final except that it's incompatible with - // good error messages and uses of struct names in ambiguous could-be-binding - // locations. Also definitely destroys the guarantee given above about ptr_eq. - /*let num = rand::thread_rng().gen_uint_range(0,0xffff); - gensym(format!("{}_{}",ident_to_string(src),num))*/ -} |
