diff options
| author | John Clements <clements@racket-lang.org> | 2013-06-04 12:34:25 -0700 |
|---|---|---|
| committer | John Clements <clements@racket-lang.org> | 2013-06-05 12:01:38 -0700 |
| commit | 320359547126b1a317d49ed68102d1b6e8be5ca2 (patch) | |
| tree | 0acad4ebe7ddec98ed996a9603098533da2f9bfb /src/libsyntax/parse | |
| parent | ae02bf70e01f48dc00b82620ff121eedc9e7db7c (diff) | |
| download | rust-320359547126b1a317d49ed68102d1b6e8be5ca2.tar.gz rust-320359547126b1a317d49ed68102d1b6e8be5ca2.zip | |
interner just uses uints, not idents with syntax context
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer.rs | 30 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 4 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 14 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 72 |
4 files changed, 71 insertions, 49 deletions
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index fe93b5b688f..7c6b2774d77 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -17,7 +17,7 @@ use diagnostic::span_handler; use ext::tt::transcribe::{tt_next_token}; use ext::tt::transcribe::{dup_tt_reader}; use parse::token; -use parse::token::{get_ident_interner}; +use parse::token::{get_ident_interner, str_to_ident}; use core::char; use core::either; @@ -275,7 +275,7 @@ fn consume_any_line_comment(rdr: @mut StringReader) // but comments with only more "/"s are not if !is_line_non_doc_comment(acc) { return Some(TokenAndSpan{ - tok: token::DOC_COMMENT(get_ident_interner().intern(acc)), + tok: token::DOC_COMMENT(str_to_ident(acc)), sp: codemap::mk_sp(start_bpos, rdr.pos) }); } @@ -329,7 +329,7 @@ fn consume_block_comment(rdr: @mut StringReader) // but comments with only "*"s between two "/"s are not if !is_block_non_doc_comment(acc) { return Some(TokenAndSpan{ - tok: token::DOC_COMMENT(get_ident_interner().intern(acc)), + tok: token::DOC_COMMENT(str_to_ident(acc)), sp: codemap::mk_sp(start_bpos, rdr.pos) }); } @@ -475,12 +475,12 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { if c == '3' && n == '2' { bump(rdr); bump(rdr); - return token::LIT_FLOAT(get_ident_interner().intern(num_str), + return token::LIT_FLOAT(str_to_ident(num_str), ast::ty_f32); } else if c == '6' && n == '4' { bump(rdr); bump(rdr); - return token::LIT_FLOAT(get_ident_interner().intern(num_str), + return token::LIT_FLOAT(str_to_ident(num_str), ast::ty_f64); /* FIXME (#2252): if this is out of range for either a 32-bit or 64-bit float, it won't be noticed till the @@ -492,9 +492,9 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { } if is_float { if is_machine_float { - return token::LIT_FLOAT(get_ident_interner().intern(num_str), ast::ty_f); + return token::LIT_FLOAT(str_to_ident(num_str), ast::ty_f); } - return token::LIT_FLOAT_UNSUFFIXED(get_ident_interner().intern(num_str)); + return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str)); } else { if str::len(num_str) == 0u { rdr.fatal(~"no valid digits found for number"); @@ -557,7 +557,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { let is_mod_name = c == ':' && nextch(rdr) == ':'; // FIXME: perform NFKC normalization here. (Issue #2253) - return token::IDENT(get_ident_interner().intern(accum_str), is_mod_name); + return token::IDENT(str_to_ident(accum_str), is_mod_name); } if is_dec_digit(c) { return scan_number(c, rdr); @@ -667,7 +667,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { lifetime_name.push_char(rdr.curr); bump(rdr); } - return token::LIFETIME(get_ident_interner().intern(lifetime_name)); + return token::LIFETIME(str_to_ident(lifetime_name)); } // Otherwise it is a character constant: @@ -740,7 +740,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { } } bump(rdr); - return token::LIT_STR(get_ident_interner().intern(accum_str)); + return token::LIT_STR(str_to_ident(accum_str)); } '-' => { if nextch(rdr) == '>' { @@ -784,7 +784,7 @@ mod test { use core::option::None; use diagnostic; use parse::token; - use parse::token::{get_ident_interner}; + use parse::token::{get_ident_interner, str_to_ident}; // represents a testing reader (incl. both reader and interner) struct Env { @@ -809,7 +809,7 @@ mod test { let Env {interner: ident_interner, string_reader} = setup(~"/* my source file */ \ fn main() { io::println(~\"zebra\"); }\n"); - let id = ident_interner.intern("fn"); + let id = str_to_ident("fn"); let tok1 = string_reader.next_token(); let tok2 = TokenAndSpan{ tok:token::IDENT(id, false), @@ -820,7 +820,7 @@ mod test { // read another token: let tok3 = string_reader.next_token(); let tok4 = TokenAndSpan{ - tok:token::IDENT(ident_interner.intern("main"), false), + tok:token::IDENT(str_to_ident("main"), false), sp:span {lo:BytePos(24),hi:BytePos(28),expn_info: None}}; assert_eq!(tok3,tok4); // the lparen is already read: @@ -839,7 +839,7 @@ mod test { // make the identifier by looking up the string in the interner fn mk_ident (env: Env, id: &str, is_mod_name: bool) -> token::Token { - token::IDENT (get_ident_interner().intern(id),is_mod_name) + token::IDENT (str_to_ident(id),is_mod_name) } #[test] fn doublecolonparsing () { @@ -898,7 +898,7 @@ mod test { let env = setup(~"'abc"); let TokenAndSpan {tok, sp: _} = env.string_reader.next_token(); - let id = get_ident_interner().intern("abc"); + let id = token::str_to_ident("abc"); assert_eq!(tok, token::LIFETIME(id)); } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 7ca2756c211..cfc2eaca241 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -348,8 +348,8 @@ mod test { use ast_util::new_ident; use abi; use parse::parser::Parser; - use parse::token::intern; - use util::parser_testing::{string_to_tts_and_sess,string_to_parser}; + use parse::token::{intern, str_to_ident}; + use util::parser_testing::{string_to_tts_and_sess, string_to_parser}; use util::parser_testing::{string_to_expr, string_to_item}; use util::parser_testing::{string_to_stmt}; diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 6260e7c460c..54845849ebb 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -85,7 +85,7 @@ use parse::obsolete::{ObsoleteLifetimeNotation, ObsoleteConstManagedPointer}; use parse::obsolete::{ObsoletePurity, ObsoleteStaticMethod}; use parse::obsolete::{ObsoleteConstItem, ObsoleteFixedLengthVectorType}; use parse::obsolete::{ObsoleteNamedExternModule, ObsoleteMultipleLocalDecl}; -use parse::token::{can_begin_expr, get_ident_interner, is_ident, is_ident_or_path}; +use parse::token::{can_begin_expr, get_ident_interner, ident_to_str, is_ident, is_ident_or_path}; use parse::token::{is_plain_ident, INTERPOLATED, keywords, special_idents, token_to_binop}; use parse::token; use parse::{new_sub_parser_from_file, next_node_id, ParseSess}; @@ -333,7 +333,7 @@ impl Parser { pub fn get_id(&self) -> node_id { next_node_id(self.sess) } pub fn id_to_str(&self, id: ident) -> @~str { - get_ident_interner().get(id) + get_ident_interner().get(id.name) } // is this one of the keywords that signals a closure type? @@ -3370,7 +3370,7 @@ impl Parser { } if fields.len() == 0 { self.fatal(fmt!("Unit-like struct should be written as `struct %s;`", - *get_ident_interner().get(class_name))); + *get_ident_interner().get(class_name.name))); } self.bump(); } else if *self.token == token::LPAREN { @@ -3582,7 +3582,7 @@ impl Parser { } fn push_mod_path(&self, id: ident, attrs: ~[ast::attribute]) { - let default_path = get_ident_interner().get(id); + let default_path = token::interner_get(id.name); let file_path = match ::attr::first_attr_value_str_by_name( attrs, "path") { @@ -3605,7 +3605,7 @@ impl Parser { let prefix = prefix.dir_path(); let mod_path_stack = &*self.mod_path_stack; let mod_path = Path(".").push_many(*mod_path_stack); - let default_path = *get_ident_interner().get(id) + ".rs"; + let default_path = *token::interner_get(id.name) + ".rs"; let file_path = match ::attr::first_attr_value_str_by_name( outer_attrs, "path") { Some(d) => { @@ -3980,7 +3980,7 @@ impl Parser { match *self.token { token::LIT_STR(s) => { self.bump(); - let the_string = self.id_to_str(s); + let the_string = ident_to_str(s); let mut words = ~[]; for str::each_word(*the_string) |s| { words.push(s) } let mut abis = AbiSet::empty(); @@ -4542,7 +4542,7 @@ impl Parser { match *self.token { token::LIT_STR(s) => { self.bump(); - self.id_to_str(s) + ident_to_str(s) } _ => self.fatal("expected string literal") } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index b5882a71dd3..400d52d5a52 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -11,6 +11,7 @@ use core::prelude::*; use ast; +use ast::Name; use ast_util; use parse::token; use util::interner::StrInterner; @@ -176,29 +177,29 @@ pub fn to_str(in: @ident_interner, t: &Token) -> ~str { u.to_str() + ast_util::uint_ty_to_str(t) } LIT_INT_UNSUFFIXED(i) => { i.to_str() } - LIT_FLOAT(s, t) => { - let mut body = copy *in.get(s); + LIT_FLOAT(ref s, t) => { + let mut body = copy *ident_to_str(*s); if body.ends_with(".") { body += "0"; // `10.f` is not a float literal } body + ast_util::float_ty_to_str(t) } - LIT_FLOAT_UNSUFFIXED(s) => { - let mut body = copy *in.get(s); + LIT_FLOAT_UNSUFFIXED(ref s) => { + let mut body = copy *ident_to_str(*s); if body.ends_with(".") { body += "0"; // `10.f` is not a float literal } body } - LIT_STR(s) => { ~"\"" + str::escape_default(*in.get(s)) + "\"" } + LIT_STR(ref s) => { ~"\"" + str::escape_default(*ident_to_str(*s)) + "\"" } /* Name components */ - IDENT(s, _) => copy *in.get(s), - LIFETIME(s) => fmt!("'%s", *in.get(s)), + IDENT(s, _) => copy *in.get(s.name), + LIFETIME(s) => fmt!("'%s", *in.get(s.name)), UNDERSCORE => ~"_", /* Other */ - DOC_COMMENT(s) => copy *in.get(s), + DOC_COMMENT(ref s) => copy *ident_to_str(*s), EOF => ~"<eof>", INTERPOLATED(ref nt) => { match nt { @@ -394,27 +395,22 @@ pub struct ident_interner { } impl ident_interner { - // I'm torn as to whether these should produce idents or - // just uints. - pub fn intern(&self, val: &str) -> ast::ident { - ast::ident { name: self.interner.intern(val), ctxt: 0 } + pub fn intern(&self, val: &str) -> Name { + self.interner.intern(val) } - pub fn gensym(&self, val: &str) -> ast::ident { - ast::ident { name: self.interner.gensym(val), ctxt: 0 } + pub fn gensym(&self, val: &str) -> Name { + self.interner.gensym(val) } - pub fn get(&self, idx: ast::ident) -> @~str { - self.interner.get(idx.name) + pub fn get(&self, idx: Name) -> @~str { + self.interner.get(idx) } + // is this really something that should be exposed? pub fn len(&self) -> uint { self.interner.len() } - pub fn find_equiv<Q:Hash + - IterBytes + - Equiv<@~str>>(&self, val: &Q) -> Option<ast::ident> { - match self.interner.find_equiv(val) { - Some(v) => Some(ast::ident { name: v, ctxt: 0 }), - None => None, - } + pub fn find_equiv<Q:Hash + IterBytes + Equiv<@~str>>(&self, val: &Q) + -> Option<Name> { + self.interner.find_equiv(val) } } @@ -532,9 +528,35 @@ pub fn mk_fake_ident_interner() -> @ident_interner { } // maps a string to its interned representation -pub fn intern(str : &str) -> uint { +pub fn intern(str : &str) -> Name { + let interner = get_ident_interner(); + interner.intern(str) +} + +// gensyms a new uint, using the current interner +pub fn gensym(str : &str) -> Name { let interner = get_ident_interner(); - interner.intern(str).name + interner.gensym(str) +} + +// map an interned representation back to a string +pub fn interner_get(name : Name) -> @~str { + get_ident_interner().get(name) +} + +// maps an identifier to the string that it corresponds to +pub fn ident_to_str(id : ast::ident) -> @~str { + interner_get(id.name) +} + +// maps a string to an identifier with an empty syntax context +pub fn str_to_ident(str : &str) -> ast::ident { + ast::new_ident(intern(str)) +} + +// maps a string to a gensym'ed identifier +pub fn gensym_ident(str : &str) -> ast::ident { + ast::new_ident(gensym(str)) } /** |
