about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2016-07-13 10:26:18 -0700
committerGitHub <noreply@github.com>2016-07-13 10:26:18 -0700
commit0b7fb80e1c05bee176ea68d21e19a352a106c968 (patch)
tree0db4006f136b461d7cbdc2cd8de302e69dc1cc17 /src/libsyntax
parent4a12a70a5c516d4aa5e86de52a62f41b67ab8bc0 (diff)
parent060b5c5ef273a6b74ccbd10c1d4a1debfa27d9de (diff)
downloadrust-0b7fb80e1c05bee176ea68d21e19a352a106c968.tar.gz
rust-0b7fb80e1c05bee176ea68d21e19a352a106c968.zip
Auto merge of #34772 - jseyfried:cleanup_interner, r=eddyb
Start cleaning up the string interner

r? @eddyb
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/parse/parser.rs2
-rw-r--r--src/libsyntax/parse/token.rs39
-rw-r--r--src/libsyntax/util/interner.rs283
3 files changed, 69 insertions, 255 deletions
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index c6374e59c1b..4656ba03e21 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -259,7 +259,6 @@ pub struct Parser<'a> {
     pub restrictions: Restrictions,
     pub quote_depth: usize, // not (yet) related to the quasiquoter
     pub reader: Box<Reader+'a>,
-    pub interner: Rc<token::IdentInterner>,
     /// The set of seen errors about obsolete syntax. Used to suppress
     /// extra detail when the same error is seen twice
     pub obsolete_set: HashSet<ObsoleteSyntax>,
@@ -356,7 +355,6 @@ impl<'a> Parser<'a> {
 
         Parser {
             reader: rdr,
-            interner: token::get_ident_interner(),
             sess: sess,
             cfg: cfg,
             token: tok0.tok,
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index 8376d28164d..ab7ed223bb3 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -17,11 +17,11 @@ pub use self::Token::*;
 use ast::{self, BinOpKind};
 use ext::mtwt;
 use ptr::P;
-use util::interner::{RcStr, StrInterner};
-use util::interner;
+use util::interner::Interner;
 use tokenstream;
 
 use serialize::{Decodable, Decoder, Encodable, Encoder};
+use std::cell::RefCell;
 use std::fmt;
 use std::ops::Deref;
 use std::rc::Rc;
@@ -397,7 +397,7 @@ macro_rules! declare_keywords {(
     }
 
     fn mk_fresh_ident_interner() -> IdentInterner {
-        interner::StrInterner::prefill(&[$($string,)*])
+        Interner::prefill(&[$($string,)*])
     }
 }}
 
@@ -473,22 +473,25 @@ declare_keywords! {
 }
 
 // looks like we can get rid of this completely...
-pub type IdentInterner = StrInterner;
+pub type IdentInterner = Interner;
 
 // if an interner exists in TLS, return it. Otherwise, prepare a
 // fresh one.
 // FIXME(eddyb) #8726 This should probably use a thread-local reference.
-pub fn get_ident_interner() -> Rc<IdentInterner> {
-    thread_local!(static KEY: Rc<::parse::token::IdentInterner> = {
-        Rc::new(mk_fresh_ident_interner())
+pub fn with_ident_interner<T, F: FnOnce(&mut IdentInterner) -> T>(f: F) -> T {
+    thread_local!(static KEY: RefCell<IdentInterner> = {
+        RefCell::new(mk_fresh_ident_interner())
     });
-    KEY.with(|k| k.clone())
+    KEY.with(|interner| f(&mut *interner.borrow_mut()))
 }
 
 /// Reset the ident interner to its initial state.
 pub fn reset_ident_interner() {
-    let interner = get_ident_interner();
-    interner.reset(mk_fresh_ident_interner());
+    with_ident_interner(|interner| *interner = mk_fresh_ident_interner());
+}
+
+pub fn clear_ident_interner() {
+    with_ident_interner(|interner| *interner = IdentInterner::new());
 }
 
 /// Represents a string stored in the thread-local interner. Because the
@@ -502,19 +505,19 @@ pub fn reset_ident_interner() {
 /// somehow.
 #[derive(Clone, PartialEq, Hash, PartialOrd, Eq, Ord)]
 pub struct InternedString {
-    string: RcStr,
+    string: Rc<String>,
 }
 
 impl InternedString {
     #[inline]
     pub fn new(string: &'static str) -> InternedString {
         InternedString {
-            string: RcStr::new(string),
+            string: Rc::new(string.to_owned()),
         }
     }
 
     #[inline]
-    fn new_from_rc_str(string: RcStr) -> InternedString {
+    fn new_from_rc_str(string: Rc<String>) -> InternedString {
         InternedString {
             string: string,
         }
@@ -522,8 +525,7 @@ impl InternedString {
 
     #[inline]
     pub fn new_from_name(name: ast::Name) -> InternedString {
-        let interner = get_ident_interner();
-        InternedString::new_from_rc_str(interner.get(name))
+        with_ident_interner(|interner| InternedString::new_from_rc_str(interner.get(name)))
     }
 }
 
@@ -611,13 +613,13 @@ pub fn intern_and_get_ident(s: &str) -> InternedString {
 /// Maps a string to its interned representation.
 #[inline]
 pub fn intern(s: &str) -> ast::Name {
-    get_ident_interner().intern(s)
+    with_ident_interner(|interner| interner.intern(s))
 }
 
 /// gensym's a new usize, using the current interner.
 #[inline]
 pub fn gensym(s: &str) -> ast::Name {
-    get_ident_interner().gensym(s)
+    with_ident_interner(|interner| interner.gensym(s))
 }
 
 /// Maps a string to an identifier with an empty syntax context.
@@ -636,8 +638,7 @@ pub fn gensym_ident(s: &str) -> ast::Ident {
 // note that this guarantees that str_ptr_eq(ident_to_string(src),interner_get(fresh_name(src)));
 // that is, that the new name and the old one are connected to ptr_eq strings.
 pub fn fresh_name(src: ast::Ident) -> ast::Name {
-    let interner = get_ident_interner();
-    interner.gensym_copy(src.name)
+    with_ident_interner(|interner| interner.gensym_copy(src.name))
     // following: debug version. Could work in final except that it's incompatible with
     // good error messages and uses of struct names in ambiguous could-be-binding
     // locations. Also definitely destroys the guarantee given above about ptr_eq.
diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs
index 7295b36af0f..6bb409715aa 100644
--- a/src/libsyntax/util/interner.rs
+++ b/src/libsyntax/util/interner.rs
@@ -15,212 +15,72 @@
 use ast::Name;
 
 use std::borrow::Borrow;
-use std::cell::RefCell;
-use std::cmp::Ordering;
 use std::collections::HashMap;
-use std::fmt;
-use std::hash::Hash;
-use std::ops::Deref;
 use std::rc::Rc;
 
-pub struct Interner<T> {
-    map: RefCell<HashMap<T, Name>>,
-    vect: RefCell<Vec<T> >,
-}
-
-// when traits can extend traits, we should extend index<Name,T> to get []
-impl<T: Eq + Hash + Clone + 'static> Interner<T> {
-    pub fn new() -> Interner<T> {
-        Interner {
-            map: RefCell::new(HashMap::new()),
-            vect: RefCell::new(Vec::new()),
-        }
-    }
-
-    pub fn prefill(init: &[T]) -> Interner<T> {
-        let rv = Interner::new();
-        for v in init {
-            rv.intern((*v).clone());
-        }
-        rv
-    }
-
-    pub fn intern(&self, val: T) -> Name {
-        let mut map = self.map.borrow_mut();
-        if let Some(&idx) = (*map).get(&val) {
-            return idx;
-        }
-
-        let mut vect = self.vect.borrow_mut();
-        let new_idx = Name((*vect).len() as u32);
-        (*map).insert(val.clone(), new_idx);
-        (*vect).push(val);
-        new_idx
-    }
-
-    pub fn gensym(&self, val: T) -> Name {
-        let mut vect = self.vect.borrow_mut();
-        let new_idx = Name((*vect).len() as u32);
-        // leave out of .map to avoid colliding
-        (*vect).push(val);
-        new_idx
-    }
-
-    pub fn get(&self, idx: Name) -> T {
-        let vect = self.vect.borrow();
-        (*vect)[idx.0 as usize].clone()
-    }
-
-    pub fn len(&self) -> usize {
-        let vect = self.vect.borrow();
-        (*vect).len()
-    }
-
-    pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name>
-    where T: Borrow<Q>, Q: Eq + Hash {
-        let map = self.map.borrow();
-        match (*map).get(val) {
-            Some(v) => Some(*v),
-            None => None,
-        }
-    }
-
-    pub fn clear(&self) {
-        *self.map.borrow_mut() = HashMap::new();
-        *self.vect.borrow_mut() = Vec::new();
-    }
-}
-
-#[derive(Clone, PartialEq, Hash, PartialOrd)]
-pub struct RcStr {
-    string: Rc<String>,
-}
-
-impl RcStr {
-    pub fn new(string: &str) -> RcStr {
-        RcStr {
-            string: Rc::new(string.to_string()),
-        }
-    }
-}
-
-impl Eq for RcStr {}
-
-impl Ord for RcStr {
-    fn cmp(&self, other: &RcStr) -> Ordering {
-        self[..].cmp(&other[..])
-    }
-}
-
-impl fmt::Debug for RcStr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self[..].fmt(f)
-    }
-}
-
-impl fmt::Display for RcStr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self[..].fmt(f)
-    }
-}
+#[derive(PartialEq, Eq, Hash)]
+struct RcStr(Rc<String>);
 
 impl Borrow<str> for RcStr {
     fn borrow(&self) -> &str {
-        &self.string[..]
+        &self.0
     }
 }
 
-impl Deref for RcStr {
-    type Target = str;
-
-    fn deref(&self) -> &str { &self.string[..] }
-}
-
-/// A StrInterner differs from Interner<String> in that it accepts
-/// &str rather than RcStr, resulting in less allocation.
-pub struct StrInterner {
-    map: RefCell<HashMap<RcStr, Name>>,
-    vect: RefCell<Vec<RcStr> >,
+#[derive(Default)]
+pub struct Interner {
+    names: HashMap<RcStr, Name>,
+    strings: Vec<Rc<String>>,
 }
 
 /// When traits can extend traits, we should extend index<Name,T> to get []
-impl StrInterner {
-    pub fn new() -> StrInterner {
-        StrInterner {
-            map: RefCell::new(HashMap::new()),
-            vect: RefCell::new(Vec::new()),
-        }
+impl Interner {
+    pub fn new() -> Self {
+        Interner::default()
     }
 
-    pub fn prefill(init: &[&str]) -> StrInterner {
-        let rv = StrInterner::new();
-        for &v in init { rv.intern(v); }
-        rv
-    }
-
-    pub fn intern(&self, val: &str) -> Name {
-        let mut map = self.map.borrow_mut();
-        if let Some(&idx) = map.get(val) {
-            return idx;
+    pub fn prefill(init: &[&str]) -> Self {
+        let mut this = Interner::new();
+        for &string in init {
+            this.intern(string);
         }
-
-        let new_idx = Name(self.len() as u32);
-        let val = RcStr::new(val);
-        map.insert(val.clone(), new_idx);
-        self.vect.borrow_mut().push(val);
-        new_idx
-    }
-
-    pub fn gensym(&self, val: &str) -> Name {
-        let new_idx = Name(self.len() as u32);
-        // leave out of .map to avoid colliding
-        self.vect.borrow_mut().push(RcStr::new(val));
-        new_idx
+        this
     }
 
-    // I want these gensyms to share name pointers
-    // with existing entries. This would be automatic,
-    // except that the existing gensym creates its
-    // own managed ptr using to_managed. I think that
-    // adding this utility function is the most
-    // lightweight way to get what I want, though not
-    // necessarily the cleanest.
-
-    /// Create a gensym with the same name as an existing
-    /// entry.
-    pub fn gensym_copy(&self, idx : Name) -> Name {
-        let new_idx = Name(self.len() as u32);
-        // leave out of map to avoid colliding
-        let mut vect = self.vect.borrow_mut();
-        let existing = (*vect)[idx.0 as usize].clone();
-        vect.push(existing);
-        new_idx
-    }
+    pub fn intern<T: Borrow<str> + Into<String>>(&mut self, string: T) -> Name {
+        if let Some(&name) = self.names.get(string.borrow()) {
+            return name;
+        }
 
-    pub fn get(&self, idx: Name) -> RcStr {
-        (*self.vect.borrow())[idx.0 as usize].clone()
+        let name = Name(self.strings.len() as u32);
+        let string = Rc::new(string.into());
+        self.strings.push(string.clone());
+        self.names.insert(RcStr(string), name);
+        name
     }
 
-    pub fn len(&self) -> usize {
-        self.vect.borrow().len()
+    pub fn gensym(&mut self, string: &str) -> Name {
+        let gensym = Name(self.strings.len() as u32);
+        // leave out of `names` to avoid colliding
+        self.strings.push(Rc::new(string.to_owned()));
+        gensym
     }
 
-    pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name>
-    where RcStr: Borrow<Q>, Q: Eq + Hash {
-        match (*self.map.borrow()).get(val) {
-            Some(v) => Some(*v),
-            None => None,
-        }
+    /// Create a gensym with the same name as an existing entry.
+    pub fn gensym_copy(&mut self, name: Name) -> Name {
+        let gensym = Name(self.strings.len() as u32);
+        // leave out of `names` to avoid colliding
+        let string = self.strings[name.0 as usize].clone();
+        self.strings.push(string);
+        gensym
     }
 
-    pub fn clear(&self) {
-        *self.map.borrow_mut() = HashMap::new();
-        *self.vect.borrow_mut() = Vec::new();
+    pub fn get(&self, name: Name) -> Rc<String> {
+        self.strings[name.0 as usize].clone()
     }
 
-    pub fn reset(&self, other: StrInterner) {
-        *self.map.borrow_mut() = other.map.into_inner();
-        *self.vect.borrow_mut() = other.vect.into_inner();
+    pub fn find(&self, string: &str) -> Option<Name> {
+        self.names.get(string).cloned()
     }
 }
 
@@ -230,53 +90,8 @@ mod tests {
     use ast::Name;
 
     #[test]
-    #[should_panic]
-    fn i1 () {
-        let i : Interner<RcStr> = Interner::new();
-        i.get(Name(13));
-    }
-
-    #[test]
-    fn interner_tests () {
-        let i : Interner<RcStr> = Interner::new();
-        // first one is zero:
-        assert_eq!(i.intern(RcStr::new("dog")), Name(0));
-        // re-use gets the same entry:
-        assert_eq!(i.intern(RcStr::new("dog")), Name(0));
-        // different string gets a different #:
-        assert_eq!(i.intern(RcStr::new("cat")), Name(1));
-        assert_eq!(i.intern(RcStr::new("cat")), Name(1));
-        // dog is still at zero
-        assert_eq!(i.intern(RcStr::new("dog")), Name(0));
-        // gensym gets 3
-        assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2));
-        // gensym of same string gets new number :
-        assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3));
-        // gensym of *existing* string gets new number:
-        assert_eq!(i.gensym(RcStr::new("dog")), Name(4));
-        assert_eq!(i.get(Name(0)), RcStr::new("dog"));
-        assert_eq!(i.get(Name(1)), RcStr::new("cat"));
-        assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
-        assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
-        assert_eq!(i.get(Name(4)), RcStr::new("dog"));
-    }
-
-    #[test]
-    fn i3 () {
-        let i : Interner<RcStr> = Interner::prefill(&[
-            RcStr::new("Alan"),
-            RcStr::new("Bob"),
-            RcStr::new("Carol")
-        ]);
-        assert_eq!(i.get(Name(0)), RcStr::new("Alan"));
-        assert_eq!(i.get(Name(1)), RcStr::new("Bob"));
-        assert_eq!(i.get(Name(2)), RcStr::new("Carol"));
-        assert_eq!(i.intern(RcStr::new("Bob")), Name(1));
-    }
-
-    #[test]
-    fn string_interner_tests() {
-        let i : StrInterner = StrInterner::new();
+    fn interner_tests() {
+        let mut i: Interner = Interner::new();
         // first one is zero:
         assert_eq!(i.intern("dog"), Name(0));
         // re-use gets the same entry:
@@ -294,13 +109,13 @@ mod tests {
         assert_eq!(i.gensym("dog"), Name(4));
         // gensym tests again with gensym_copy:
         assert_eq!(i.gensym_copy(Name(2)), Name(5));
-        assert_eq!(i.get(Name(5)), RcStr::new("zebra"));
+        assert_eq!(*i.get(Name(5)), "zebra");
         assert_eq!(i.gensym_copy(Name(2)), Name(6));
-        assert_eq!(i.get(Name(6)), RcStr::new("zebra"));
-        assert_eq!(i.get(Name(0)), RcStr::new("dog"));
-        assert_eq!(i.get(Name(1)), RcStr::new("cat"));
-        assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
-        assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
-        assert_eq!(i.get(Name(4)), RcStr::new("dog"));
+        assert_eq!(*i.get(Name(6)), "zebra");
+        assert_eq!(*i.get(Name(0)), "dog");
+        assert_eq!(*i.get(Name(1)), "cat");
+        assert_eq!(*i.get(Name(2)), "zebra");
+        assert_eq!(*i.get(Name(3)), "zebra");
+        assert_eq!(*i.get(Name(4)), "dog");
     }
 }