about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorJohn Clements <clements@racket-lang.org>2013-06-26 10:11:19 -0700
committerJohn Clements <clements@racket-lang.org>2013-09-06 13:35:10 -0700
commit58e7598c2e1265a0f1292ed6f93bfb29abb93504 (patch)
tree926acafd1d74b6d1d359fe66840d64f0f5fa2bb3 /src/libsyntax
parent9d33001a90319fc242dcf43ec3c7e1fa1c11d847 (diff)
downloadrust-58e7598c2e1265a0f1292ed6f93bfb29abb93504.tar.gz
rust-58e7598c2e1265a0f1292ed6f93bfb29abb93504.zip
added gensym_copy mechanism to ensure sharing of pointers in the interner
this makes comparisons constant-time, and enables spelling-comparison
of identifiers, crucial in many parts of resolve.
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/parse/token.rs73
-rw-r--r--src/libsyntax/util/interner.rs63
2 files changed, 88 insertions, 48 deletions
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index 29c460c5c3d..17928338f37 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -15,12 +15,12 @@ use parse::token;
 use util::interner::StrInterner;
 use util::interner;
 
+use std::cast;
 use std::char;
 use std::cmp::Equiv;
 use std::local_data;
 use std::rand;
 use std::rand::RngUtil;
-use std::ptr::to_unsafe_ptr;
 
 #[deriving(Clone, Encodable, Decodable, Eq, IterBytes)]
 pub enum binop {
@@ -382,30 +382,8 @@ pub fn token_to_binop(tok: &Token) -> Option<ast::BinOp> {
   }
 }
 
-pub struct ident_interner {
-    priv interner: StrInterner,
-}
-
-impl ident_interner {
-    pub fn intern(&self, val: &str) -> Name {
-        self.interner.intern(val)
-    }
-    pub fn gensym(&self, val: &str) -> Name {
-        self.interner.gensym(val)
-    }
-    pub fn get(&self, idx: Name) -> @str {
-        self.interner.get(idx)
-    }
-    // is this really something that should be exposed?
-    pub fn len(&self) -> uint {
-        self.interner.len()
-    }
-    pub fn find_equiv<Q:Hash + IterBytes + Equiv<@str>>(&self, val: &Q)
-                                                     -> Option<Name> {
-        self.interner.find_equiv(val)
-    }
-}
-
+// looks like we can get rid of this completely...
+pub type ident_interner = StrInterner;
 
 // return a fresh interner, preloaded with special identifiers.
 fn mk_fresh_ident_interner() -> @ident_interner {
@@ -486,9 +464,7 @@ fn mk_fresh_ident_interner() -> @ident_interner {
         "typeof",             // 67
     ];
 
-    @ident_interner {
-        interner: interner::StrInterner::prefill(init_vec)
-    }
+    @interner::StrInterner::prefill(init_vec)
 }
 
 // if an interner exists in TLS, return it. Otherwise, prepare a
@@ -509,7 +485,7 @@ pub fn get_ident_interner() -> @ident_interner {
 /* for when we don't care about the contents; doesn't interact with TLD or
    serialization */
 pub fn mk_fake_ident_interner() -> @ident_interner {
-    @ident_interner { interner: interner::StrInterner::new() }
+    @interner::StrInterner::new()
 }
 
 // maps a string to its interned representation
@@ -545,10 +521,11 @@ pub fn gensym_ident(str : &str) -> ast::Ident {
 }
 
 // create a fresh name that maps to the same string as the old one.
-// note that this guarantees that ptr_eq(ident_to_str(src),interner_get(fresh_name(src)));
+// note that this guarantees that str_ptr_eq(ident_to_str(src),interner_get(fresh_name(src)));
 // that is, that the new name and the old one are connected to ptr_eq strings.
 pub fn fresh_name(src : &ast::Ident) -> Name {
-    gensym(ident_to_str(src))
+    let interner = get_ident_interner();
+    interner.gensym_copy(src.name)
     // following: debug version. Could work in final except that it's incompatible with
     // good error messages and uses of struct names in ambiguous could-be-binding
     // locations. Also definitely destroys the guarantee given above about ptr_eq.
@@ -557,18 +534,26 @@ pub fn fresh_name(src : &ast::Ident) -> Name {
 }
 
 // it looks like there oughta be a str_ptr_eq fn, but no one bothered to implement it?
-pub fn str_ptr_eq<T>(a: @str, b: @str) -> bool {
-    // doesn't compile! ...because of rebase mangling. this should be fixed
-    // in the commit that follows this.
-    let (a_ptr, b_ptr): (*uint, *uint) = (to_unsafe_ptr(a), to_unsafe_ptr(b));
-    a_ptr == b_ptr
-}
-
 
+// determine whether two @str values are pointer-equal
+pub fn str_ptr_eq(a : @str, b : @str) -> bool {
+    unsafe {
+        let p : uint = cast::transmute(a);
+        let q : uint = cast::transmute(b);
+        let result = p == q;
+        // got to transmute them back, to make sure the ref count is correct:
+        let junk1 : @str = cast::transmute(p);
+        let junk2 : @str = cast::transmute(q);
+        result
+    }
+}
 
 // return true when two identifiers refer (through the intern table) to the same ptr_eq
 // string. This is used to compare identifiers in places where hygienic comparison is
 // not wanted (i.e. not lexical vars).
+pub fn ident_spelling_eq(a : &ast::Ident, b : &ast::Ident) -> bool {
+    str_ptr_eq(interner_get(a.name),interner_get(b.name))
+}
 
 // create a fresh mark.
 pub fn fresh_mark() -> Mrk {
@@ -721,13 +706,21 @@ mod test {
     use ast_util;
 
 
-    #[test] fn t1() {
+    #[test] fn str_ptr_eq_tests(){
+        let a = @"abc";
+        let b = @"abc";
+        let c = a;
+        assert!(str_ptr_eq(a,c));
+        assert!(!str_ptr_eq(a,b));
+    }
+
+    #[test] fn fresh_name_pointer_sharing() {
         let ghi = str_to_ident("ghi");
         assert_eq!(ident_to_str(&ghi),@"ghi");
+        assert!(str_ptr_eq(ident_to_str(&ghi),ident_to_str(&ghi)))
         let fresh = ast::Ident::new(fresh_name(&ghi));
         assert_eq!(ident_to_str(&fresh),@"ghi");
         assert!(str_ptr_eq(ident_to_str(&ghi),ident_to_str(&fresh)));
-        assert_eq!(3,4);
     }
 
 }
diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs
index 46676ce1093..2b1e7eaa9b2 100644
--- a/src/libsyntax/util/interner.rs
+++ b/src/libsyntax/util/interner.rs
@@ -117,6 +117,23 @@ impl StrInterner {
         new_idx
     }
 
+    // I want these gensyms to share name pointers
+    // with existing entries. This would be automatic,
+    // except that the existing gensym creates its
+    // own managed ptr using to_managed. I think that
+    // adding this utility function is the most
+    // lightweight way to get what I want, though not
+    // necessarily the cleanest.
+
+    // create a gensym with the same name as an existing
+    // entry.
+    pub fn gensym_copy(&self, idx : uint) -> uint {
+        let new_idx = self.len();
+        // leave out of map to avoid colliding
+        self.vect.push(self.vect[idx]);
+        new_idx
+    }
+
     // this isn't "pure" in the traditional sense, because it can go from
     // failing to returning a value as items are interned. But for typestate,
     // where we first check a pred and then rely on it, ceasing to fail is ok.
@@ -144,23 +161,23 @@ mod tests {
     }
 
     #[test]
-    fn i2 () {
+    fn interner_tests () {
         let i : Interner<@str> = Interner::new();
         // first one is zero:
-        assert_eq!(i.intern (@"dog"), 0);
+        assert_eq!(i.intern(@"dog"), 0);
         // re-use gets the same entry:
-        assert_eq!(i.intern (@"dog"), 0);
+        assert_eq!(i.intern(@"dog"), 0);
         // different string gets a different #:
-        assert_eq!(i.intern (@"cat"), 1);
-        assert_eq!(i.intern (@"cat"), 1);
+        assert_eq!(i.intern(@"cat"), 1);
+        assert_eq!(i.intern(@"cat"), 1);
         // dog is still at zero
-        assert_eq!(i.intern (@"dog"), 0);
+        assert_eq!(i.intern(@"dog"), 0);
         // gensym gets 3
-        assert_eq!(i.gensym (@"zebra" ), 2);
+        assert_eq!(i.gensym(@"zebra" ), 2);
         // gensym of same string gets new number :
         assert_eq!(i.gensym (@"zebra" ), 3);
         // gensym of *existing* string gets new number:
-        assert_eq!(i.gensym (@"dog"), 4);
+        assert_eq!(i.gensym(@"dog"), 4);
         assert_eq!(i.get(0), @"dog");
         assert_eq!(i.get(1), @"cat");
         assert_eq!(i.get(2), @"zebra");
@@ -176,4 +193,34 @@ mod tests {
         assert_eq!(i.get(2), @"Carol");
         assert_eq!(i.intern(@"Bob"), 1);
     }
+
+    #[test]
+    fn string_interner_tests() {
+        let i : StrInterner = StrInterner::new();
+        // first one is zero:
+        assert_eq!(i.intern("dog"), 0);
+        // re-use gets the same entry:
+        assert_eq!(i.intern ("dog"), 0);
+        // different string gets a different #:
+        assert_eq!(i.intern("cat"), 1);
+        assert_eq!(i.intern("cat"), 1);
+        // dog is still at zero
+        assert_eq!(i.intern("dog"), 0);
+        // gensym gets 3
+        assert_eq!(i.gensym("zebra"), 2);
+        // gensym of same string gets new number :
+        assert_eq!(i.gensym("zebra"), 3);
+        // gensym of *existing* string gets new number:
+        assert_eq!(i.gensym("dog"), 4);
+        // gensym tests again with gensym_copy:
+        assert_eq!(i.gensym_copy(2), 5);
+        assert_eq!(i.get(5), @"zebra");
+        assert_eq!(i.gensym_copy(2), 6);
+        assert_eq!(i.get(6), @"zebra");
+        assert_eq!(i.get(0), @"dog");
+        assert_eq!(i.get(1), @"cat");
+        assert_eq!(i.get(2), @"zebra");
+        assert_eq!(i.get(3), @"zebra");
+        assert_eq!(i.get(4), @"dog");
+    }
 }