diff options
| author | bors <bors@rust-lang.org> | 2016-07-13 10:26:18 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-07-13 10:26:18 -0700 |
| commit | 0b7fb80e1c05bee176ea68d21e19a352a106c968 (patch) | |
| tree | 0db4006f136b461d7cbdc2cd8de302e69dc1cc17 /src/libsyntax/util | |
| parent | 4a12a70a5c516d4aa5e86de52a62f41b67ab8bc0 (diff) | |
| parent | 060b5c5ef273a6b74ccbd10c1d4a1debfa27d9de (diff) | |
| download | rust-0b7fb80e1c05bee176ea68d21e19a352a106c968.tar.gz rust-0b7fb80e1c05bee176ea68d21e19a352a106c968.zip | |
Auto merge of #34772 - jseyfried:cleanup_interner, r=eddyb
Start cleaning up the string interner r? @eddyb
Diffstat (limited to 'src/libsyntax/util')
| -rw-r--r-- | src/libsyntax/util/interner.rs | 283 |
1 files changed, 49 insertions, 234 deletions
diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs index 7295b36af0f..6bb409715aa 100644 --- a/src/libsyntax/util/interner.rs +++ b/src/libsyntax/util/interner.rs @@ -15,212 +15,72 @@ use ast::Name; use std::borrow::Borrow; -use std::cell::RefCell; -use std::cmp::Ordering; use std::collections::HashMap; -use std::fmt; -use std::hash::Hash; -use std::ops::Deref; use std::rc::Rc; -pub struct Interner<T> { - map: RefCell<HashMap<T, Name>>, - vect: RefCell<Vec<T> >, -} - -// when traits can extend traits, we should extend index<Name,T> to get [] -impl<T: Eq + Hash + Clone + 'static> Interner<T> { - pub fn new() -> Interner<T> { - Interner { - map: RefCell::new(HashMap::new()), - vect: RefCell::new(Vec::new()), - } - } - - pub fn prefill(init: &[T]) -> Interner<T> { - let rv = Interner::new(); - for v in init { - rv.intern((*v).clone()); - } - rv - } - - pub fn intern(&self, val: T) -> Name { - let mut map = self.map.borrow_mut(); - if let Some(&idx) = (*map).get(&val) { - return idx; - } - - let mut vect = self.vect.borrow_mut(); - let new_idx = Name((*vect).len() as u32); - (*map).insert(val.clone(), new_idx); - (*vect).push(val); - new_idx - } - - pub fn gensym(&self, val: T) -> Name { - let mut vect = self.vect.borrow_mut(); - let new_idx = Name((*vect).len() as u32); - // leave out of .map to avoid colliding - (*vect).push(val); - new_idx - } - - pub fn get(&self, idx: Name) -> T { - let vect = self.vect.borrow(); - (*vect)[idx.0 as usize].clone() - } - - pub fn len(&self) -> usize { - let vect = self.vect.borrow(); - (*vect).len() - } - - pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name> - where T: Borrow<Q>, Q: Eq + Hash { - let map = self.map.borrow(); - match (*map).get(val) { - Some(v) => Some(*v), - None => None, - } - } - - pub fn clear(&self) { - *self.map.borrow_mut() = HashMap::new(); - *self.vect.borrow_mut() = Vec::new(); - } -} - -#[derive(Clone, PartialEq, Hash, PartialOrd)] -pub struct RcStr { - string: Rc<String>, -} - -impl RcStr { - pub fn new(string: &str) -> RcStr { - RcStr { - string: Rc::new(string.to_string()), - } - } -} - -impl Eq for RcStr {} - -impl Ord for RcStr { - fn cmp(&self, other: &RcStr) -> Ordering { - self[..].cmp(&other[..]) - } -} - -impl fmt::Debug for RcStr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self[..].fmt(f) - } -} - -impl fmt::Display for RcStr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self[..].fmt(f) - } -} +#[derive(PartialEq, Eq, Hash)] +struct RcStr(Rc<String>); impl Borrow<str> for RcStr { fn borrow(&self) -> &str { - &self.string[..] + &self.0 } } -impl Deref for RcStr { - type Target = str; - - fn deref(&self) -> &str { &self.string[..] } -} - -/// A StrInterner differs from Interner<String> in that it accepts -/// &str rather than RcStr, resulting in less allocation. -pub struct StrInterner { - map: RefCell<HashMap<RcStr, Name>>, - vect: RefCell<Vec<RcStr> >, +#[derive(Default)] +pub struct Interner { + names: HashMap<RcStr, Name>, + strings: Vec<Rc<String>>, } /// When traits can extend traits, we should extend index<Name,T> to get [] -impl StrInterner { - pub fn new() -> StrInterner { - StrInterner { - map: RefCell::new(HashMap::new()), - vect: RefCell::new(Vec::new()), - } +impl Interner { + pub fn new() -> Self { + Interner::default() } - pub fn prefill(init: &[&str]) -> StrInterner { - let rv = StrInterner::new(); - for &v in init { rv.intern(v); } - rv - } - - pub fn intern(&self, val: &str) -> Name { - let mut map = self.map.borrow_mut(); - if let Some(&idx) = map.get(val) { - return idx; + pub fn prefill(init: &[&str]) -> Self { + let mut this = Interner::new(); + for &string in init { + this.intern(string); } - - let new_idx = Name(self.len() as u32); - let val = RcStr::new(val); - map.insert(val.clone(), new_idx); - self.vect.borrow_mut().push(val); - new_idx - } - - pub fn gensym(&self, val: &str) -> Name { - let new_idx = Name(self.len() as u32); - // leave out of .map to avoid colliding - self.vect.borrow_mut().push(RcStr::new(val)); - new_idx + this } - // I want these gensyms to share name pointers - // with existing entries. This would be automatic, - // except that the existing gensym creates its - // own managed ptr using to_managed. I think that - // adding this utility function is the most - // lightweight way to get what I want, though not - // necessarily the cleanest. - - /// Create a gensym with the same name as an existing - /// entry. - pub fn gensym_copy(&self, idx : Name) -> Name { - let new_idx = Name(self.len() as u32); - // leave out of map to avoid colliding - let mut vect = self.vect.borrow_mut(); - let existing = (*vect)[idx.0 as usize].clone(); - vect.push(existing); - new_idx - } + pub fn intern<T: Borrow<str> + Into<String>>(&mut self, string: T) -> Name { + if let Some(&name) = self.names.get(string.borrow()) { + return name; + } - pub fn get(&self, idx: Name) -> RcStr { - (*self.vect.borrow())[idx.0 as usize].clone() + let name = Name(self.strings.len() as u32); + let string = Rc::new(string.into()); + self.strings.push(string.clone()); + self.names.insert(RcStr(string), name); + name } - pub fn len(&self) -> usize { - self.vect.borrow().len() + pub fn gensym(&mut self, string: &str) -> Name { + let gensym = Name(self.strings.len() as u32); + // leave out of `names` to avoid colliding + self.strings.push(Rc::new(string.to_owned())); + gensym } - pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name> - where RcStr: Borrow<Q>, Q: Eq + Hash { - match (*self.map.borrow()).get(val) { - Some(v) => Some(*v), - None => None, - } + /// Create a gensym with the same name as an existing entry. + pub fn gensym_copy(&mut self, name: Name) -> Name { + let gensym = Name(self.strings.len() as u32); + // leave out of `names` to avoid colliding + let string = self.strings[name.0 as usize].clone(); + self.strings.push(string); + gensym } - pub fn clear(&self) { - *self.map.borrow_mut() = HashMap::new(); - *self.vect.borrow_mut() = Vec::new(); + pub fn get(&self, name: Name) -> Rc<String> { + self.strings[name.0 as usize].clone() } - pub fn reset(&self, other: StrInterner) { - *self.map.borrow_mut() = other.map.into_inner(); - *self.vect.borrow_mut() = other.vect.into_inner(); + pub fn find(&self, string: &str) -> Option<Name> { + self.names.get(string).cloned() } } @@ -230,53 +90,8 @@ mod tests { use ast::Name; #[test] - #[should_panic] - fn i1 () { - let i : Interner<RcStr> = Interner::new(); - i.get(Name(13)); - } - - #[test] - fn interner_tests () { - let i : Interner<RcStr> = Interner::new(); - // first one is zero: - assert_eq!(i.intern(RcStr::new("dog")), Name(0)); - // re-use gets the same entry: - assert_eq!(i.intern(RcStr::new("dog")), Name(0)); - // different string gets a different #: - assert_eq!(i.intern(RcStr::new("cat")), Name(1)); - assert_eq!(i.intern(RcStr::new("cat")), Name(1)); - // dog is still at zero - assert_eq!(i.intern(RcStr::new("dog")), Name(0)); - // gensym gets 3 - assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2)); - // gensym of same string gets new number : - assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3)); - // gensym of *existing* string gets new number: - assert_eq!(i.gensym(RcStr::new("dog")), Name(4)); - assert_eq!(i.get(Name(0)), RcStr::new("dog")); - assert_eq!(i.get(Name(1)), RcStr::new("cat")); - assert_eq!(i.get(Name(2)), RcStr::new("zebra")); - assert_eq!(i.get(Name(3)), RcStr::new("zebra")); - assert_eq!(i.get(Name(4)), RcStr::new("dog")); - } - - #[test] - fn i3 () { - let i : Interner<RcStr> = Interner::prefill(&[ - RcStr::new("Alan"), - RcStr::new("Bob"), - RcStr::new("Carol") - ]); - assert_eq!(i.get(Name(0)), RcStr::new("Alan")); - assert_eq!(i.get(Name(1)), RcStr::new("Bob")); - assert_eq!(i.get(Name(2)), RcStr::new("Carol")); - assert_eq!(i.intern(RcStr::new("Bob")), Name(1)); - } - - #[test] - fn string_interner_tests() { - let i : StrInterner = StrInterner::new(); + fn interner_tests() { + let mut i: Interner = Interner::new(); // first one is zero: assert_eq!(i.intern("dog"), Name(0)); // re-use gets the same entry: @@ -294,13 +109,13 @@ mod tests { assert_eq!(i.gensym("dog"), Name(4)); // gensym tests again with gensym_copy: assert_eq!(i.gensym_copy(Name(2)), Name(5)); - assert_eq!(i.get(Name(5)), RcStr::new("zebra")); + assert_eq!(*i.get(Name(5)), "zebra"); assert_eq!(i.gensym_copy(Name(2)), Name(6)); - assert_eq!(i.get(Name(6)), RcStr::new("zebra")); - assert_eq!(i.get(Name(0)), RcStr::new("dog")); - assert_eq!(i.get(Name(1)), RcStr::new("cat")); - assert_eq!(i.get(Name(2)), RcStr::new("zebra")); - assert_eq!(i.get(Name(3)), RcStr::new("zebra")); - assert_eq!(i.get(Name(4)), RcStr::new("dog")); + assert_eq!(*i.get(Name(6)), "zebra"); + assert_eq!(*i.get(Name(0)), "dog"); + assert_eq!(*i.get(Name(1)), "cat"); + assert_eq!(*i.get(Name(2)), "zebra"); + assert_eq!(*i.get(Name(3)), "zebra"); + assert_eq!(*i.get(Name(4)), "dog"); } } |
