about summary refs log tree commit diff
path: root/src/libsyntax/parse
diff options
context:
space:
mode:
authorPaul Stansifer <paul.stansifer@gmail.com>2012-07-18 16:18:02 -0700
committerPaul Stansifer <paul.stansifer@gmail.com>2012-08-22 14:59:25 -0700
commit1153b5dcc86c3567b0a86e441938f05d4f2e295b (patch)
treefdcbcea39abecb4ad1ea5145e62e8c013b05e930 /src/libsyntax/parse
parent7317bf8792ebb3f27768109b7d574ee0806cc5e5 (diff)
downloadrust-1153b5dcc86c3567b0a86e441938f05d4f2e295b.tar.gz
rust-1153b5dcc86c3567b0a86e441938f05d4f2e295b.zip
intern identifiers
Diffstat (limited to 'src/libsyntax/parse')
-rw-r--r--src/libsyntax/parse/attr.rs12
-rw-r--r--src/libsyntax/parse/comments.rs2
-rw-r--r--src/libsyntax/parse/common.rs10
-rw-r--r--src/libsyntax/parse/eval.rs23
-rw-r--r--src/libsyntax/parse/parser.rs59
-rw-r--r--src/libsyntax/parse/token.rs76
6 files changed, 123 insertions, 59 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs
index cbb6709d9c8..54b0e3388f7 100644
--- a/src/libsyntax/parse/attr.rs
+++ b/src/libsyntax/parse/attr.rs
@@ -66,7 +66,7 @@ impl parser: parser_attr {
               }
               token::DOC_COMMENT(s) => {
                 let attr = ::attr::mk_sugared_doc_attr(
-                        *self.get_str(s), self.span.lo, self.span.hi);
+                        *self.id_to_str(s), self.span.lo, self.span.hi);
                 if attr.node.style != ast::attr_outer {
                   self.fatal(~"expected outer comment");
                 }
@@ -128,7 +128,7 @@ impl parser: parser_attr {
               }
               token::DOC_COMMENT(s) => {
                 let attr = ::attr::mk_sugared_doc_attr(
-                        *self.get_str(s), self.span.lo, self.span.hi);
+                        *self.id_to_str(s), self.span.lo, self.span.hi);
                 self.bump();
                 if attr.node.style == ast::attr_inner {
                   inner_attrs += ~[attr];
@@ -145,22 +145,22 @@ impl parser: parser_attr {
 
     fn parse_meta_item() -> @ast::meta_item {
         let lo = self.span.lo;
-        let ident = self.parse_ident();
+        let name = *self.id_to_str(self.parse_ident());
         match self.token {
           token::EQ => {
             self.bump();
             let lit = self.parse_lit();
             let mut hi = self.span.hi;
-            return @spanned(lo, hi, ast::meta_name_value(ident, lit));
+            return @spanned(lo, hi, ast::meta_name_value(name, lit));
           }
           token::LPAREN => {
             let inner_items = self.parse_meta_seq();
             let mut hi = self.span.hi;
-            return @spanned(lo, hi, ast::meta_list(ident, inner_items));
+            return @spanned(lo, hi, ast::meta_list(name, inner_items));
           }
           _ => {
             let mut hi = self.span.hi;
-            return @spanned(lo, hi, ast::meta_word(ident));
+            return @spanned(lo, hi, ast::meta_word(name));
           }
         }
     }
diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs
index b10da4482e4..09edbda4335 100644
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -276,7 +276,7 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
                                 srdr: io::Reader) ->
    {cmnts: ~[cmnt], lits: ~[lit]} {
     let src = @str::from_bytes(srdr.read_whole_stream());
-    let itr = parse::token::mk_ident_interner();
+    let itr = parse::token::mk_fake_ident_interner();
     let rdr = lexer::new_low_level_string_reader
         (span_diagnostic, codemap::new_filemap(path, src, 0u, 0u), itr);
 
diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs
index 59dad16dc44..1d260268d3f 100644
--- a/src/libsyntax/parse/common.rs
+++ b/src/libsyntax/parse/common.rs
@@ -85,7 +85,7 @@ impl parser: parser_common {
 
     fn parse_ident() -> ast::ident {
         match copy self.token {
-          token::IDENT(i, _) => { self.bump(); return self.get_str(i); }
+          token::IDENT(i, _) => { self.bump(); return i; }
           token::INTERPOLATED(token::nt_ident(*)) => { self.bug(
               ~"ident interpolation not converted to real token"); }
           _ => { self.fatal(~"expected ident, found `"
@@ -110,6 +110,8 @@ impl parser: parser_common {
         return if self.token == tok { self.bump(); true } else { false };
     }
 
+    // Storing keywords as interned idents instead of strings would be nifty.
+
     // A sanity check that the word we are asking for is a known keyword
     fn require_keyword(word: ~str) {
         if !self.keywords.contains_key_ref(&word) {
@@ -119,7 +121,7 @@ impl parser: parser_common {
 
     fn token_is_word(word: ~str, ++tok: token::token) -> bool {
         match tok {
-          token::IDENT(sid, false) => { word == *self.get_str(sid) }
+          token::IDENT(sid, false) => { *self.id_to_str(sid) == word }
           _ => { false }
         }
     }
@@ -136,7 +138,7 @@ impl parser: parser_common {
     fn is_any_keyword(tok: token::token) -> bool {
         match tok {
           token::IDENT(sid, false) => {
-            self.keywords.contains_key_ref(self.get_str(sid))
+            self.keywords.contains_key_ref(self.id_to_str(sid))
           }
           _ => false
         }
@@ -148,7 +150,7 @@ impl parser: parser_common {
         let mut bump = false;
         let val = match self.token {
           token::IDENT(sid, false) => {
-            if word == *self.get_str(sid) {
+            if word == *self.id_to_str(sid) {
                 bump = true;
                 true
             } else { false }
diff --git a/src/libsyntax/parse/eval.rs b/src/libsyntax/parse/eval.rs
index 6b0112922a5..60d2fadb04c 100644
--- a/src/libsyntax/parse/eval.rs
+++ b/src/libsyntax/parse/eval.rs
@@ -78,10 +78,10 @@ fn parse_companion_mod(cx: ctx, prefix: ~str, suffix: option<~str>)
     }
 }
 
-fn cdir_path_opt(id: ast::ident, attrs: ~[ast::attribute]) -> @~str {
+fn cdir_path_opt(default: ~str, attrs: ~[ast::attribute]) -> ~str {
     match ::attr::first_attr_value_str_by_name(attrs, ~"path") {
-      some(d) => return d,
-      none => return id
+      some(d) => d,
+      none => default
     }
 }
 
@@ -90,11 +90,12 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: ~str,
                         &items: ~[@ast::item]) {
     match cdir.node {
       ast::cdir_src_mod(id, attrs) => {
-        let file_path = cdir_path_opt(@(*id + ~".rs"), attrs);
+        let file_path = cdir_path_opt((cx.sess.interner.get(id) + ~".rs"),
+                                      attrs);
         let full_path =
-            if path::path_is_absolute(*file_path) {
-                *file_path
-            } else { prefix + path::path_sep() + *file_path };
+            if path::path_is_absolute(file_path) {
+                file_path
+            } else { prefix + path::path_sep() + file_path };
         let (p0, r0) =
             new_parser_etc_from_file(cx.sess, cx.cfg, full_path, SOURCE_FILE);
         let inner_attrs = p0.parse_inner_attrs_and_next();
@@ -111,11 +112,11 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: ~str,
         vec::push(items, i);
       }
       ast::cdir_dir_mod(id, cdirs, attrs) => {
-        let path = cdir_path_opt(id, attrs);
+        let path = cdir_path_opt(*cx.sess.interner.get(id), attrs);
         let full_path =
-            if path::path_is_absolute(*path) {
-                *path
-            } else { prefix + path::path_sep() + *path };
+            if path::path_is_absolute(path) {
+                path
+            } else { prefix + path::path_sep() + path };
         let (m0, a0) = eval_crate_directives_to_mod(
             cx, cdirs, full_path, none);
         let i =
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index 648ec3e60db..fea79309c21 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -6,7 +6,7 @@ import std::map::{hashmap, str_hash};
 import token::{can_begin_expr, is_ident, is_ident_or_path, is_plain_ident,
                INTERPOLATED};
 import codemap::{span,fss_none};
-import util::interner;
+import util::interner::interner;
 import ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec};
 import lexer::reader;
 import prec::{as_prec, token_to_binop};
@@ -193,12 +193,14 @@ struct parser {
     let mut restriction: restriction;
     let mut quote_depth: uint; // not (yet) related to the quasiquoter
     let reader: reader;
+    let interner: interner<@~str>;
     let keywords: hashmap<~str, ()>;
     let restricted_keywords: hashmap<~str, ()>;
 
     new(sess: parse_sess, cfg: ast::crate_cfg, +rdr: reader, ftype: file_type)
     {
         self.reader <- rdr;
+        self.interner = self.reader.interner();
         let tok0 = self.reader.next_token();
         let span0 = tok0.sp;
         self.sess = sess;
@@ -268,11 +270,10 @@ struct parser {
     fn warn(m: ~str) {
         self.sess.span_diagnostic.span_warn(copy self.span, m)
     }
-    pure fn get_str(i: token::str_num) -> @~str {
-        self.reader.interner().get(i)
-    }
     fn get_id() -> node_id { next_node_id(self.sess) }
 
+    pure fn id_to_str(id: ident) -> @~str { self.sess.interner.get(id) }
+
     fn parse_ty_fn(purity: ast::purity) -> ty_ {
         let proto, bounds;
         if self.eat_keyword(~"extern") {
@@ -398,9 +399,9 @@ struct parser {
         }
     }
 
-    fn region_from_name(s: option<@~str>) -> @region {
+    fn region_from_name(s: option<ident>) -> @region {
         let r = match s {
-          some (string) => re_named(string),
+          some (id) => re_named(id),
           none => re_anon
         };
 
@@ -414,8 +415,7 @@ struct parser {
         match copy self.token {
           token::IDENT(sid, _) => {
             self.bump();
-            let n = self.get_str(sid);
-            self.region_from_name(some(n))
+            self.region_from_name(some(sid))
           }
           _ => {
             self.region_from_name(none)
@@ -430,7 +430,7 @@ struct parser {
               token::IDENT(sid, _) => {
                 if self.look_ahead(1u) == token::BINOP(token::SLASH) {
                     self.bump(); self.bump();
-                    some(self.get_str(sid))
+                    some(sid)
                 } else {
                     none
                 }
@@ -583,7 +583,7 @@ struct parser {
                 let name = self.parse_value_ident();
                 self.bump();
                 name
-            } else { @~"" }
+            } else { token::special_idents::invalid }
         };
 
         let t = self.parse_ty(false);
@@ -678,10 +678,10 @@ struct parser {
           token::LIT_INT(i, it) => lit_int(i, it),
           token::LIT_UINT(u, ut) => lit_uint(u, ut),
           token::LIT_INT_UNSUFFIXED(i) => lit_int_unsuffixed(i),
-          token::LIT_FLOAT(s, ft) => lit_float(self.get_str(s), ft),
-          token::LIT_STR(s) => lit_str(self.get_str(s)),
-          token::LPAREN => { self.expect(token::RPAREN); lit_nil }
-          _ => self.unexpected_last(tok)
+          token::LIT_FLOAT(s, ft) => lit_float(self.id_to_str(s), ft),
+          token::LIT_STR(s) => lit_str(self.id_to_str(s)),
+          token::LPAREN => { self.expect(token::RPAREN); lit_nil },
+          _ => { self.unexpected_last(tok); }
         }
     }
 
@@ -1140,8 +1140,7 @@ struct parser {
                         self.parse_seq_to_gt(some(token::COMMA),
                                              |p| p.parse_ty(false))
                     } else { ~[] };
-                    e = self.mk_pexpr(lo, hi, expr_field(self.to_expr(e),
-                                                         self.get_str(i),
+                    e = self.mk_pexpr(lo, hi, expr_field(self.to_expr(e), i,
                                                          tys));
                   }
                   _ => self.unexpected()
@@ -2123,9 +2122,6 @@ struct parser {
     }
 
     fn expr_is_complete(e: pexpr) -> bool {
-        log(debug, (~"expr_is_complete", self.restriction,
-                    print::pprust::expr_to_str(*e),
-                    classify::expr_requires_semi_to_be_stmt(*e)));
         return self.restriction == RESTRICT_STMT_EXPR &&
             !classify::expr_requires_semi_to_be_stmt(*e);
     }
@@ -2306,8 +2302,9 @@ struct parser {
 
     fn is_self_ident() -> bool {
         match self.token {
-            token::IDENT(sid, false) if ~"self" == *self.get_str(sid) => true,
-            _ => false
+          token::IDENT(id, false) if id == token::special_idents::self_
+            => true,
+          _ => false
         }
     }
 
@@ -2522,11 +2519,13 @@ struct parser {
         }
 
         // This is a new-style impl declaration.
-        let ident = @~"__extensions__";     // XXX: clownshoes
+        // XXX: clownshoes
+        let ident = token::special_idents::clownshoes_extensions;
 
         // Parse the type.
         let ty = self.parse_ty(false);
 
+
         // Parse traits, if necessary.
         let traits = if self.token == token::COLON {
             self.bump();
@@ -2595,7 +2594,8 @@ struct parser {
                       match the_ctor {
                         some((_, _, _, s_first)) => {
                           self.span_note(s, #fmt("Duplicate constructor \
-                                     declaration for class %s", *class_name));
+                                     declaration for class %s",
+                                     *self.interner.get(class_name)));
                            self.span_fatal(copy s_first, ~"First constructor \
                                                           declared here");
                         }
@@ -2608,7 +2608,8 @@ struct parser {
                       match the_dtor {
                         some((_, _, s_first)) => {
                           self.span_note(s, #fmt("Duplicate destructor \
-                                     declaration for class %s", *class_name));
+                                     declaration for class %s",
+                                     *self.interner.get(class_name)));
                           self.span_fatal(copy s_first, ~"First destructor \
                                                           declared here");
                         }
@@ -3081,7 +3082,7 @@ struct parser {
         let ty_params = self.parse_ty_params();
         // Newtype syntax
         if self.token == token::EQ {
-            self.check_restricted_keywords_(*id);
+            self.check_restricted_keywords_(*self.id_to_str(id));
             self.bump();
             let ty = self.parse_ty(false);
             self.expect(token::SEMI);
@@ -3297,7 +3298,7 @@ struct parser {
         let lo = self.span.lo;
         let first_ident = self.parse_ident();
         let mut path = ~[first_ident];
-        debug!{"parsed view_path: %s", *first_ident};
+        debug!{"parsed view_path: %s", *self.id_to_str(first_ident)};
         match self.token {
           token::EQ => {
             // x = foo::bar
@@ -3323,7 +3324,7 @@ struct parser {
 
                   token::IDENT(i, _) => {
                     self.bump();
-                    vec::push(path, self.get_str(i));
+                    vec::push(path, i);
                   }
 
                   // foo::bar::{a,b,c}
@@ -3458,8 +3459,8 @@ struct parser {
 
     fn parse_str() -> @~str {
         match copy self.token {
-          token::LIT_STR(s) => { self.bump(); self.get_str(s) }
-          _ => self.fatal(~"expected string literal")
+          token::LIT_STR(s) => { self.bump(); self.id_to_str(s) }
+          _ =>  self.fatal(~"expected string literal")
         }
     }
 
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index 10fac8d0e23..8eb9270efe2 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -277,23 +277,83 @@ pure fn is_bar(t: token) -> bool {
     match t { BINOP(OR) | OROR => true, _ => false }
 }
 
-type ident_interner = util::interner::interner<@~str>;
 
 mod special_idents {
-    const underscore : uint = 0u;
-    const anon : uint = 1u;
-    const destr : uint = 2u; // 'drop', but that's reserved
+    import ast::ident;
+    const underscore : ident = 0u;
+    const anon : ident = 1u;
+    const dtor : ident = 2u; // 'drop', but that's reserved
+    const invalid : ident = 3u; // ''
+    const unary : ident = 4u;
+    const not_fn : ident = 5u;
+    const idx_fn : ident = 6u;
+    const unary_minus_fn : ident = 7u;
+    const clownshoes_extensions : ident = 8u;
+
+    const self_ : ident = 9u; // 'self'
+
+    /* for matcher NTs */
+    const item : ident = 10u;
+    const block : ident = 11u;
+    const stmt : ident = 12u;
+    const pat : ident = 13u;
+    const expr : ident = 14u;
+    const ty : ident = 15u;
+    const ident : ident = 16u;
+    const path : ident = 17u;
+    const tt : ident = 18u;
+    const matchers : ident = 19u;
+
+    const str : ident = 20u; // for the type
+
+    /* outside of libsyntax */
+    const ty_visitor : ident = 21u;
+    const arg : ident = 22u;
+    const descrim : ident = 23u;
+    const clownshoe_abi : ident = 24u;
+    const clownshoe_stack_shim : ident = 25u;
+    const tydesc : ident = 26u;
+    const literally_dtor : ident = 27u;
+    const main : ident = 28u;
+    const opaque : ident = 29u;
+    const blk : ident = 30u;
+    const static : ident = 31u;
+    const intrinsic : ident = 32u;
+
 }
 
+type ident_interner = util::interner::interner<@~str>;
+
+/** Key for thread-local data for sneaking interner information to the
+ * serializer/deserializer. It sounds like a hack because it is one. */
+fn interner_key(+_x: @@ident_interner) { }
+
 fn mk_ident_interner() -> ident_interner {
     /* the indices here must correspond to the numbers in special_idents */
-    let init_vec = ~[@~"_", @~"anon", @~"drop"];
-
-    let rv = @interner::mk_prefill::<@~str>(|x| str::hash(*x),
-                                            |x,y| str::eq(*x, *y), init_vec);
+    let init_vec = ~[@~"_", @~"anon", @~"drop", @~"", @~"unary", @~"!",
+                     @~"[]", @~"unary-", @~"__extensions__", @~"self",
+                     @~"item", @~"block", @~"stmt", @~"pat", @~"expr",
+                     @~"ty", @~"ident", @~"path", @~"tt", @~"matchers",
+                     @~"str", @~"ty_visitor", @~"arg", @~"descrim",
+                     @~"__rust_abi", @~"__rust_stack_shim", @~"tydesc",
+                     @~"dtor", @~"main", @~"<opaque>", @~"blk", @~"static",
+                     @~"intrinsic"];
+
+    let rv = interner::mk_prefill::<@~str>(|x| str::hash(*x),
+                                           |x,y| str::eq(*x, *y), init_vec);
+
+    /* having multiple interners will just confuse the serializer */
+    unsafe{ assert task::local_data_get(interner_key) == none };
+    unsafe{ task::local_data_set(interner_key, @rv) };
     rv
 }
 
+/* for when we don't care about the contents; doesn't interact with TLD or
+   serialization */
+fn mk_fake_ident_interner() -> ident_interner {
+    interner::mk::<@~str>(|x| str::hash(*x), |x,y| str::eq(*x, *y))
+}
+
 /**
  * All the valid words that have meaning in the Rust language.
  *