diff options
| author | Paul Stansifer <paul.stansifer@gmail.com> | 2012-07-18 16:18:02 -0700 |
|---|---|---|
| committer | Paul Stansifer <paul.stansifer@gmail.com> | 2012-08-22 14:59:25 -0700 |
| commit | 1153b5dcc86c3567b0a86e441938f05d4f2e295b (patch) | |
| tree | fdcbcea39abecb4ad1ea5145e62e8c013b05e930 /src/libsyntax/parse | |
| parent | 7317bf8792ebb3f27768109b7d574ee0806cc5e5 (diff) | |
| download | rust-1153b5dcc86c3567b0a86e441938f05d4f2e295b.tar.gz rust-1153b5dcc86c3567b0a86e441938f05d4f2e295b.zip | |
intern identifiers
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/attr.rs | 12 | ||||
| -rw-r--r-- | src/libsyntax/parse/comments.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/parse/common.rs | 10 | ||||
| -rw-r--r-- | src/libsyntax/parse/eval.rs | 23 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 59 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 76 |
6 files changed, 123 insertions, 59 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index cbb6709d9c8..54b0e3388f7 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -66,7 +66,7 @@ impl parser: parser_attr { } token::DOC_COMMENT(s) => { let attr = ::attr::mk_sugared_doc_attr( - *self.get_str(s), self.span.lo, self.span.hi); + *self.id_to_str(s), self.span.lo, self.span.hi); if attr.node.style != ast::attr_outer { self.fatal(~"expected outer comment"); } @@ -128,7 +128,7 @@ impl parser: parser_attr { } token::DOC_COMMENT(s) => { let attr = ::attr::mk_sugared_doc_attr( - *self.get_str(s), self.span.lo, self.span.hi); + *self.id_to_str(s), self.span.lo, self.span.hi); self.bump(); if attr.node.style == ast::attr_inner { inner_attrs += ~[attr]; @@ -145,22 +145,22 @@ impl parser: parser_attr { fn parse_meta_item() -> @ast::meta_item { let lo = self.span.lo; - let ident = self.parse_ident(); + let name = *self.id_to_str(self.parse_ident()); match self.token { token::EQ => { self.bump(); let lit = self.parse_lit(); let mut hi = self.span.hi; - return @spanned(lo, hi, ast::meta_name_value(ident, lit)); + return @spanned(lo, hi, ast::meta_name_value(name, lit)); } token::LPAREN => { let inner_items = self.parse_meta_seq(); let mut hi = self.span.hi; - return @spanned(lo, hi, ast::meta_list(ident, inner_items)); + return @spanned(lo, hi, ast::meta_list(name, inner_items)); } _ => { let mut hi = self.span.hi; - return @spanned(lo, hi, ast::meta_word(ident)); + return @spanned(lo, hi, ast::meta_word(name)); } } } diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index b10da4482e4..09edbda4335 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -276,7 +276,7 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler, srdr: io::Reader) -> {cmnts: ~[cmnt], lits: ~[lit]} { let src = @str::from_bytes(srdr.read_whole_stream()); - let itr = parse::token::mk_ident_interner(); + let itr = parse::token::mk_fake_ident_interner(); let rdr = lexer::new_low_level_string_reader (span_diagnostic, codemap::new_filemap(path, src, 0u, 0u), itr); diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs index 59dad16dc44..1d260268d3f 100644 --- a/src/libsyntax/parse/common.rs +++ b/src/libsyntax/parse/common.rs @@ -85,7 +85,7 @@ impl parser: parser_common { fn parse_ident() -> ast::ident { match copy self.token { - token::IDENT(i, _) => { self.bump(); return self.get_str(i); } + token::IDENT(i, _) => { self.bump(); return i; } token::INTERPOLATED(token::nt_ident(*)) => { self.bug( ~"ident interpolation not converted to real token"); } _ => { self.fatal(~"expected ident, found `" @@ -110,6 +110,8 @@ impl parser: parser_common { return if self.token == tok { self.bump(); true } else { false }; } + // Storing keywords as interned idents instead of strings would be nifty. + // A sanity check that the word we are asking for is a known keyword fn require_keyword(word: ~str) { if !self.keywords.contains_key_ref(&word) { @@ -119,7 +121,7 @@ impl parser: parser_common { fn token_is_word(word: ~str, ++tok: token::token) -> bool { match tok { - token::IDENT(sid, false) => { word == *self.get_str(sid) } + token::IDENT(sid, false) => { *self.id_to_str(sid) == word } _ => { false } } } @@ -136,7 +138,7 @@ impl parser: parser_common { fn is_any_keyword(tok: token::token) -> bool { match tok { token::IDENT(sid, false) => { - self.keywords.contains_key_ref(self.get_str(sid)) + self.keywords.contains_key_ref(self.id_to_str(sid)) } _ => false } @@ -148,7 +150,7 @@ impl parser: parser_common { let mut bump = false; let val = match self.token { token::IDENT(sid, false) => { - if word == *self.get_str(sid) { + if word == *self.id_to_str(sid) { bump = true; true } else { false } diff --git a/src/libsyntax/parse/eval.rs b/src/libsyntax/parse/eval.rs index 6b0112922a5..60d2fadb04c 100644 --- a/src/libsyntax/parse/eval.rs +++ b/src/libsyntax/parse/eval.rs @@ -78,10 +78,10 @@ fn parse_companion_mod(cx: ctx, prefix: ~str, suffix: option<~str>) } } -fn cdir_path_opt(id: ast::ident, attrs: ~[ast::attribute]) -> @~str { +fn cdir_path_opt(default: ~str, attrs: ~[ast::attribute]) -> ~str { match ::attr::first_attr_value_str_by_name(attrs, ~"path") { - some(d) => return d, - none => return id + some(d) => d, + none => default } } @@ -90,11 +90,12 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: ~str, &items: ~[@ast::item]) { match cdir.node { ast::cdir_src_mod(id, attrs) => { - let file_path = cdir_path_opt(@(*id + ~".rs"), attrs); + let file_path = cdir_path_opt((cx.sess.interner.get(id) + ~".rs"), + attrs); let full_path = - if path::path_is_absolute(*file_path) { - *file_path - } else { prefix + path::path_sep() + *file_path }; + if path::path_is_absolute(file_path) { + file_path + } else { prefix + path::path_sep() + file_path }; let (p0, r0) = new_parser_etc_from_file(cx.sess, cx.cfg, full_path, SOURCE_FILE); let inner_attrs = p0.parse_inner_attrs_and_next(); @@ -111,11 +112,11 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: ~str, vec::push(items, i); } ast::cdir_dir_mod(id, cdirs, attrs) => { - let path = cdir_path_opt(id, attrs); + let path = cdir_path_opt(*cx.sess.interner.get(id), attrs); let full_path = - if path::path_is_absolute(*path) { - *path - } else { prefix + path::path_sep() + *path }; + if path::path_is_absolute(path) { + path + } else { prefix + path::path_sep() + path }; let (m0, a0) = eval_crate_directives_to_mod( cx, cdirs, full_path, none); let i = diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 648ec3e60db..fea79309c21 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -6,7 +6,7 @@ import std::map::{hashmap, str_hash}; import token::{can_begin_expr, is_ident, is_ident_or_path, is_plain_ident, INTERPOLATED}; import codemap::{span,fss_none}; -import util::interner; +import util::interner::interner; import ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec}; import lexer::reader; import prec::{as_prec, token_to_binop}; @@ -193,12 +193,14 @@ struct parser { let mut restriction: restriction; let mut quote_depth: uint; // not (yet) related to the quasiquoter let reader: reader; + let interner: interner<@~str>; let keywords: hashmap<~str, ()>; let restricted_keywords: hashmap<~str, ()>; new(sess: parse_sess, cfg: ast::crate_cfg, +rdr: reader, ftype: file_type) { self.reader <- rdr; + self.interner = self.reader.interner(); let tok0 = self.reader.next_token(); let span0 = tok0.sp; self.sess = sess; @@ -268,11 +270,10 @@ struct parser { fn warn(m: ~str) { self.sess.span_diagnostic.span_warn(copy self.span, m) } - pure fn get_str(i: token::str_num) -> @~str { - self.reader.interner().get(i) - } fn get_id() -> node_id { next_node_id(self.sess) } + pure fn id_to_str(id: ident) -> @~str { self.sess.interner.get(id) } + fn parse_ty_fn(purity: ast::purity) -> ty_ { let proto, bounds; if self.eat_keyword(~"extern") { @@ -398,9 +399,9 @@ struct parser { } } - fn region_from_name(s: option<@~str>) -> @region { + fn region_from_name(s: option<ident>) -> @region { let r = match s { - some (string) => re_named(string), + some (id) => re_named(id), none => re_anon }; @@ -414,8 +415,7 @@ struct parser { match copy self.token { token::IDENT(sid, _) => { self.bump(); - let n = self.get_str(sid); - self.region_from_name(some(n)) + self.region_from_name(some(sid)) } _ => { self.region_from_name(none) @@ -430,7 +430,7 @@ struct parser { token::IDENT(sid, _) => { if self.look_ahead(1u) == token::BINOP(token::SLASH) { self.bump(); self.bump(); - some(self.get_str(sid)) + some(sid) } else { none } @@ -583,7 +583,7 @@ struct parser { let name = self.parse_value_ident(); self.bump(); name - } else { @~"" } + } else { token::special_idents::invalid } }; let t = self.parse_ty(false); @@ -678,10 +678,10 @@ struct parser { token::LIT_INT(i, it) => lit_int(i, it), token::LIT_UINT(u, ut) => lit_uint(u, ut), token::LIT_INT_UNSUFFIXED(i) => lit_int_unsuffixed(i), - token::LIT_FLOAT(s, ft) => lit_float(self.get_str(s), ft), - token::LIT_STR(s) => lit_str(self.get_str(s)), - token::LPAREN => { self.expect(token::RPAREN); lit_nil } - _ => self.unexpected_last(tok) + token::LIT_FLOAT(s, ft) => lit_float(self.id_to_str(s), ft), + token::LIT_STR(s) => lit_str(self.id_to_str(s)), + token::LPAREN => { self.expect(token::RPAREN); lit_nil }, + _ => { self.unexpected_last(tok); } } } @@ -1140,8 +1140,7 @@ struct parser { self.parse_seq_to_gt(some(token::COMMA), |p| p.parse_ty(false)) } else { ~[] }; - e = self.mk_pexpr(lo, hi, expr_field(self.to_expr(e), - self.get_str(i), + e = self.mk_pexpr(lo, hi, expr_field(self.to_expr(e), i, tys)); } _ => self.unexpected() @@ -2123,9 +2122,6 @@ struct parser { } fn expr_is_complete(e: pexpr) -> bool { - log(debug, (~"expr_is_complete", self.restriction, - print::pprust::expr_to_str(*e), - classify::expr_requires_semi_to_be_stmt(*e))); return self.restriction == RESTRICT_STMT_EXPR && !classify::expr_requires_semi_to_be_stmt(*e); } @@ -2306,8 +2302,9 @@ struct parser { fn is_self_ident() -> bool { match self.token { - token::IDENT(sid, false) if ~"self" == *self.get_str(sid) => true, - _ => false + token::IDENT(id, false) if id == token::special_idents::self_ + => true, + _ => false } } @@ -2522,11 +2519,13 @@ struct parser { } // This is a new-style impl declaration. - let ident = @~"__extensions__"; // XXX: clownshoes + // XXX: clownshoes + let ident = token::special_idents::clownshoes_extensions; // Parse the type. let ty = self.parse_ty(false); + // Parse traits, if necessary. let traits = if self.token == token::COLON { self.bump(); @@ -2595,7 +2594,8 @@ struct parser { match the_ctor { some((_, _, _, s_first)) => { self.span_note(s, #fmt("Duplicate constructor \ - declaration for class %s", *class_name)); + declaration for class %s", + *self.interner.get(class_name))); self.span_fatal(copy s_first, ~"First constructor \ declared here"); } @@ -2608,7 +2608,8 @@ struct parser { match the_dtor { some((_, _, s_first)) => { self.span_note(s, #fmt("Duplicate destructor \ - declaration for class %s", *class_name)); + declaration for class %s", + *self.interner.get(class_name))); self.span_fatal(copy s_first, ~"First destructor \ declared here"); } @@ -3081,7 +3082,7 @@ struct parser { let ty_params = self.parse_ty_params(); // Newtype syntax if self.token == token::EQ { - self.check_restricted_keywords_(*id); + self.check_restricted_keywords_(*self.id_to_str(id)); self.bump(); let ty = self.parse_ty(false); self.expect(token::SEMI); @@ -3297,7 +3298,7 @@ struct parser { let lo = self.span.lo; let first_ident = self.parse_ident(); let mut path = ~[first_ident]; - debug!{"parsed view_path: %s", *first_ident}; + debug!{"parsed view_path: %s", *self.id_to_str(first_ident)}; match self.token { token::EQ => { // x = foo::bar @@ -3323,7 +3324,7 @@ struct parser { token::IDENT(i, _) => { self.bump(); - vec::push(path, self.get_str(i)); + vec::push(path, i); } // foo::bar::{a,b,c} @@ -3458,8 +3459,8 @@ struct parser { fn parse_str() -> @~str { match copy self.token { - token::LIT_STR(s) => { self.bump(); self.get_str(s) } - _ => self.fatal(~"expected string literal") + token::LIT_STR(s) => { self.bump(); self.id_to_str(s) } + _ => self.fatal(~"expected string literal") } } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 10fac8d0e23..8eb9270efe2 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -277,23 +277,83 @@ pure fn is_bar(t: token) -> bool { match t { BINOP(OR) | OROR => true, _ => false } } -type ident_interner = util::interner::interner<@~str>; mod special_idents { - const underscore : uint = 0u; - const anon : uint = 1u; - const destr : uint = 2u; // 'drop', but that's reserved + import ast::ident; + const underscore : ident = 0u; + const anon : ident = 1u; + const dtor : ident = 2u; // 'drop', but that's reserved + const invalid : ident = 3u; // '' + const unary : ident = 4u; + const not_fn : ident = 5u; + const idx_fn : ident = 6u; + const unary_minus_fn : ident = 7u; + const clownshoes_extensions : ident = 8u; + + const self_ : ident = 9u; // 'self' + + /* for matcher NTs */ + const item : ident = 10u; + const block : ident = 11u; + const stmt : ident = 12u; + const pat : ident = 13u; + const expr : ident = 14u; + const ty : ident = 15u; + const ident : ident = 16u; + const path : ident = 17u; + const tt : ident = 18u; + const matchers : ident = 19u; + + const str : ident = 20u; // for the type + + /* outside of libsyntax */ + const ty_visitor : ident = 21u; + const arg : ident = 22u; + const descrim : ident = 23u; + const clownshoe_abi : ident = 24u; + const clownshoe_stack_shim : ident = 25u; + const tydesc : ident = 26u; + const literally_dtor : ident = 27u; + const main : ident = 28u; + const opaque : ident = 29u; + const blk : ident = 30u; + const static : ident = 31u; + const intrinsic : ident = 32u; + } +type ident_interner = util::interner::interner<@~str>; + +/** Key for thread-local data for sneaking interner information to the + * serializer/deserializer. It sounds like a hack because it is one. */ +fn interner_key(+_x: @@ident_interner) { } + fn mk_ident_interner() -> ident_interner { /* the indices here must correspond to the numbers in special_idents */ - let init_vec = ~[@~"_", @~"anon", @~"drop"]; - - let rv = @interner::mk_prefill::<@~str>(|x| str::hash(*x), - |x,y| str::eq(*x, *y), init_vec); + let init_vec = ~[@~"_", @~"anon", @~"drop", @~"", @~"unary", @~"!", + @~"[]", @~"unary-", @~"__extensions__", @~"self", + @~"item", @~"block", @~"stmt", @~"pat", @~"expr", + @~"ty", @~"ident", @~"path", @~"tt", @~"matchers", + @~"str", @~"ty_visitor", @~"arg", @~"descrim", + @~"__rust_abi", @~"__rust_stack_shim", @~"tydesc", + @~"dtor", @~"main", @~"<opaque>", @~"blk", @~"static", + @~"intrinsic"]; + + let rv = interner::mk_prefill::<@~str>(|x| str::hash(*x), + |x,y| str::eq(*x, *y), init_vec); + + /* having multiple interners will just confuse the serializer */ + unsafe{ assert task::local_data_get(interner_key) == none }; + unsafe{ task::local_data_set(interner_key, @rv) }; rv } +/* for when we don't care about the contents; doesn't interact with TLD or + serialization */ +fn mk_fake_ident_interner() -> ident_interner { + interner::mk::<@~str>(|x| str::hash(*x), |x,y| str::eq(*x, *y)) +} + /** * All the valid words that have meaning in the Rust language. * |
