diff options
| author | bors <bors@rust-lang.org> | 2016-07-13 10:26:18 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2016-07-13 10:26:18 -0700 |
| commit | 0b7fb80e1c05bee176ea68d21e19a352a106c968 (patch) | |
| tree | 0db4006f136b461d7cbdc2cd8de302e69dc1cc17 /src/libsyntax | |
| parent | 4a12a70a5c516d4aa5e86de52a62f41b67ab8bc0 (diff) | |
| parent | 060b5c5ef273a6b74ccbd10c1d4a1debfa27d9de (diff) | |
| download | rust-0b7fb80e1c05bee176ea68d21e19a352a106c968.tar.gz rust-0b7fb80e1c05bee176ea68d21e19a352a106c968.zip | |
Auto merge of #34772 - jseyfried:cleanup_interner, r=eddyb
Start cleaning up the string interner r? @eddyb
Diffstat (limited to 'src/libsyntax')
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 39 | ||||
| -rw-r--r-- | src/libsyntax/util/interner.rs | 283 |
3 files changed, 69 insertions, 255 deletions
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index c6374e59c1b..4656ba03e21 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -259,7 +259,6 @@ pub struct Parser<'a> { pub restrictions: Restrictions, pub quote_depth: usize, // not (yet) related to the quasiquoter pub reader: Box<Reader+'a>, - pub interner: Rc<token::IdentInterner>, /// The set of seen errors about obsolete syntax. Used to suppress /// extra detail when the same error is seen twice pub obsolete_set: HashSet<ObsoleteSyntax>, @@ -356,7 +355,6 @@ impl<'a> Parser<'a> { Parser { reader: rdr, - interner: token::get_ident_interner(), sess: sess, cfg: cfg, token: tok0.tok, diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 8376d28164d..ab7ed223bb3 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -17,11 +17,11 @@ pub use self::Token::*; use ast::{self, BinOpKind}; use ext::mtwt; use ptr::P; -use util::interner::{RcStr, StrInterner}; -use util::interner; +use util::interner::Interner; use tokenstream; use serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::cell::RefCell; use std::fmt; use std::ops::Deref; use std::rc::Rc; @@ -397,7 +397,7 @@ macro_rules! declare_keywords {( } fn mk_fresh_ident_interner() -> IdentInterner { - interner::StrInterner::prefill(&[$($string,)*]) + Interner::prefill(&[$($string,)*]) } }} @@ -473,22 +473,25 @@ declare_keywords! { } // looks like we can get rid of this completely... -pub type IdentInterner = StrInterner; +pub type IdentInterner = Interner; // if an interner exists in TLS, return it. Otherwise, prepare a // fresh one. // FIXME(eddyb) #8726 This should probably use a thread-local reference. -pub fn get_ident_interner() -> Rc<IdentInterner> { - thread_local!(static KEY: Rc<::parse::token::IdentInterner> = { - Rc::new(mk_fresh_ident_interner()) +pub fn with_ident_interner<T, F: FnOnce(&mut IdentInterner) -> T>(f: F) -> T { + thread_local!(static KEY: RefCell<IdentInterner> = { + RefCell::new(mk_fresh_ident_interner()) }); - KEY.with(|k| k.clone()) + KEY.with(|interner| f(&mut *interner.borrow_mut())) } /// Reset the ident interner to its initial state. pub fn reset_ident_interner() { - let interner = get_ident_interner(); - interner.reset(mk_fresh_ident_interner()); + with_ident_interner(|interner| *interner = mk_fresh_ident_interner()); +} + +pub fn clear_ident_interner() { + with_ident_interner(|interner| *interner = IdentInterner::new()); } /// Represents a string stored in the thread-local interner. Because the @@ -502,19 +505,19 @@ pub fn reset_ident_interner() { /// somehow. #[derive(Clone, PartialEq, Hash, PartialOrd, Eq, Ord)] pub struct InternedString { - string: RcStr, + string: Rc<String>, } impl InternedString { #[inline] pub fn new(string: &'static str) -> InternedString { InternedString { - string: RcStr::new(string), + string: Rc::new(string.to_owned()), } } #[inline] - fn new_from_rc_str(string: RcStr) -> InternedString { + fn new_from_rc_str(string: Rc<String>) -> InternedString { InternedString { string: string, } @@ -522,8 +525,7 @@ impl InternedString { #[inline] pub fn new_from_name(name: ast::Name) -> InternedString { - let interner = get_ident_interner(); - InternedString::new_from_rc_str(interner.get(name)) + with_ident_interner(|interner| InternedString::new_from_rc_str(interner.get(name))) } } @@ -611,13 +613,13 @@ pub fn intern_and_get_ident(s: &str) -> InternedString { /// Maps a string to its interned representation. #[inline] pub fn intern(s: &str) -> ast::Name { - get_ident_interner().intern(s) + with_ident_interner(|interner| interner.intern(s)) } /// gensym's a new usize, using the current interner. #[inline] pub fn gensym(s: &str) -> ast::Name { - get_ident_interner().gensym(s) + with_ident_interner(|interner| interner.gensym(s)) } /// Maps a string to an identifier with an empty syntax context. @@ -636,8 +638,7 @@ pub fn gensym_ident(s: &str) -> ast::Ident { // note that this guarantees that str_ptr_eq(ident_to_string(src),interner_get(fresh_name(src))); // that is, that the new name and the old one are connected to ptr_eq strings. pub fn fresh_name(src: ast::Ident) -> ast::Name { - let interner = get_ident_interner(); - interner.gensym_copy(src.name) + with_ident_interner(|interner| interner.gensym_copy(src.name)) // following: debug version. Could work in final except that it's incompatible with // good error messages and uses of struct names in ambiguous could-be-binding // locations. Also definitely destroys the guarantee given above about ptr_eq. diff --git a/src/libsyntax/util/interner.rs b/src/libsyntax/util/interner.rs index 7295b36af0f..6bb409715aa 100644 --- a/src/libsyntax/util/interner.rs +++ b/src/libsyntax/util/interner.rs @@ -15,212 +15,72 @@ use ast::Name; use std::borrow::Borrow; -use std::cell::RefCell; -use std::cmp::Ordering; use std::collections::HashMap; -use std::fmt; -use std::hash::Hash; -use std::ops::Deref; use std::rc::Rc; -pub struct Interner<T> { - map: RefCell<HashMap<T, Name>>, - vect: RefCell<Vec<T> >, -} - -// when traits can extend traits, we should extend index<Name,T> to get [] -impl<T: Eq + Hash + Clone + 'static> Interner<T> { - pub fn new() -> Interner<T> { - Interner { - map: RefCell::new(HashMap::new()), - vect: RefCell::new(Vec::new()), - } - } - - pub fn prefill(init: &[T]) -> Interner<T> { - let rv = Interner::new(); - for v in init { - rv.intern((*v).clone()); - } - rv - } - - pub fn intern(&self, val: T) -> Name { - let mut map = self.map.borrow_mut(); - if let Some(&idx) = (*map).get(&val) { - return idx; - } - - let mut vect = self.vect.borrow_mut(); - let new_idx = Name((*vect).len() as u32); - (*map).insert(val.clone(), new_idx); - (*vect).push(val); - new_idx - } - - pub fn gensym(&self, val: T) -> Name { - let mut vect = self.vect.borrow_mut(); - let new_idx = Name((*vect).len() as u32); - // leave out of .map to avoid colliding - (*vect).push(val); - new_idx - } - - pub fn get(&self, idx: Name) -> T { - let vect = self.vect.borrow(); - (*vect)[idx.0 as usize].clone() - } - - pub fn len(&self) -> usize { - let vect = self.vect.borrow(); - (*vect).len() - } - - pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name> - where T: Borrow<Q>, Q: Eq + Hash { - let map = self.map.borrow(); - match (*map).get(val) { - Some(v) => Some(*v), - None => None, - } - } - - pub fn clear(&self) { - *self.map.borrow_mut() = HashMap::new(); - *self.vect.borrow_mut() = Vec::new(); - } -} - -#[derive(Clone, PartialEq, Hash, PartialOrd)] -pub struct RcStr { - string: Rc<String>, -} - -impl RcStr { - pub fn new(string: &str) -> RcStr { - RcStr { - string: Rc::new(string.to_string()), - } - } -} - -impl Eq for RcStr {} - -impl Ord for RcStr { - fn cmp(&self, other: &RcStr) -> Ordering { - self[..].cmp(&other[..]) - } -} - -impl fmt::Debug for RcStr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self[..].fmt(f) - } -} - -impl fmt::Display for RcStr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self[..].fmt(f) - } -} +#[derive(PartialEq, Eq, Hash)] +struct RcStr(Rc<String>); impl Borrow<str> for RcStr { fn borrow(&self) -> &str { - &self.string[..] + &self.0 } } -impl Deref for RcStr { - type Target = str; - - fn deref(&self) -> &str { &self.string[..] } -} - -/// A StrInterner differs from Interner<String> in that it accepts -/// &str rather than RcStr, resulting in less allocation. -pub struct StrInterner { - map: RefCell<HashMap<RcStr, Name>>, - vect: RefCell<Vec<RcStr> >, +#[derive(Default)] +pub struct Interner { + names: HashMap<RcStr, Name>, + strings: Vec<Rc<String>>, } /// When traits can extend traits, we should extend index<Name,T> to get [] -impl StrInterner { - pub fn new() -> StrInterner { - StrInterner { - map: RefCell::new(HashMap::new()), - vect: RefCell::new(Vec::new()), - } +impl Interner { + pub fn new() -> Self { + Interner::default() } - pub fn prefill(init: &[&str]) -> StrInterner { - let rv = StrInterner::new(); - for &v in init { rv.intern(v); } - rv - } - - pub fn intern(&self, val: &str) -> Name { - let mut map = self.map.borrow_mut(); - if let Some(&idx) = map.get(val) { - return idx; + pub fn prefill(init: &[&str]) -> Self { + let mut this = Interner::new(); + for &string in init { + this.intern(string); } - - let new_idx = Name(self.len() as u32); - let val = RcStr::new(val); - map.insert(val.clone(), new_idx); - self.vect.borrow_mut().push(val); - new_idx - } - - pub fn gensym(&self, val: &str) -> Name { - let new_idx = Name(self.len() as u32); - // leave out of .map to avoid colliding - self.vect.borrow_mut().push(RcStr::new(val)); - new_idx + this } - // I want these gensyms to share name pointers - // with existing entries. This would be automatic, - // except that the existing gensym creates its - // own managed ptr using to_managed. I think that - // adding this utility function is the most - // lightweight way to get what I want, though not - // necessarily the cleanest. - - /// Create a gensym with the same name as an existing - /// entry. - pub fn gensym_copy(&self, idx : Name) -> Name { - let new_idx = Name(self.len() as u32); - // leave out of map to avoid colliding - let mut vect = self.vect.borrow_mut(); - let existing = (*vect)[idx.0 as usize].clone(); - vect.push(existing); - new_idx - } + pub fn intern<T: Borrow<str> + Into<String>>(&mut self, string: T) -> Name { + if let Some(&name) = self.names.get(string.borrow()) { + return name; + } - pub fn get(&self, idx: Name) -> RcStr { - (*self.vect.borrow())[idx.0 as usize].clone() + let name = Name(self.strings.len() as u32); + let string = Rc::new(string.into()); + self.strings.push(string.clone()); + self.names.insert(RcStr(string), name); + name } - pub fn len(&self) -> usize { - self.vect.borrow().len() + pub fn gensym(&mut self, string: &str) -> Name { + let gensym = Name(self.strings.len() as u32); + // leave out of `names` to avoid colliding + self.strings.push(Rc::new(string.to_owned())); + gensym } - pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name> - where RcStr: Borrow<Q>, Q: Eq + Hash { - match (*self.map.borrow()).get(val) { - Some(v) => Some(*v), - None => None, - } + /// Create a gensym with the same name as an existing entry. + pub fn gensym_copy(&mut self, name: Name) -> Name { + let gensym = Name(self.strings.len() as u32); + // leave out of `names` to avoid colliding + let string = self.strings[name.0 as usize].clone(); + self.strings.push(string); + gensym } - pub fn clear(&self) { - *self.map.borrow_mut() = HashMap::new(); - *self.vect.borrow_mut() = Vec::new(); + pub fn get(&self, name: Name) -> Rc<String> { + self.strings[name.0 as usize].clone() } - pub fn reset(&self, other: StrInterner) { - *self.map.borrow_mut() = other.map.into_inner(); - *self.vect.borrow_mut() = other.vect.into_inner(); + pub fn find(&self, string: &str) -> Option<Name> { + self.names.get(string).cloned() } } @@ -230,53 +90,8 @@ mod tests { use ast::Name; #[test] - #[should_panic] - fn i1 () { - let i : Interner<RcStr> = Interner::new(); - i.get(Name(13)); - } - - #[test] - fn interner_tests () { - let i : Interner<RcStr> = Interner::new(); - // first one is zero: - assert_eq!(i.intern(RcStr::new("dog")), Name(0)); - // re-use gets the same entry: - assert_eq!(i.intern(RcStr::new("dog")), Name(0)); - // different string gets a different #: - assert_eq!(i.intern(RcStr::new("cat")), Name(1)); - assert_eq!(i.intern(RcStr::new("cat")), Name(1)); - // dog is still at zero - assert_eq!(i.intern(RcStr::new("dog")), Name(0)); - // gensym gets 3 - assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2)); - // gensym of same string gets new number : - assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3)); - // gensym of *existing* string gets new number: - assert_eq!(i.gensym(RcStr::new("dog")), Name(4)); - assert_eq!(i.get(Name(0)), RcStr::new("dog")); - assert_eq!(i.get(Name(1)), RcStr::new("cat")); - assert_eq!(i.get(Name(2)), RcStr::new("zebra")); - assert_eq!(i.get(Name(3)), RcStr::new("zebra")); - assert_eq!(i.get(Name(4)), RcStr::new("dog")); - } - - #[test] - fn i3 () { - let i : Interner<RcStr> = Interner::prefill(&[ - RcStr::new("Alan"), - RcStr::new("Bob"), - RcStr::new("Carol") - ]); - assert_eq!(i.get(Name(0)), RcStr::new("Alan")); - assert_eq!(i.get(Name(1)), RcStr::new("Bob")); - assert_eq!(i.get(Name(2)), RcStr::new("Carol")); - assert_eq!(i.intern(RcStr::new("Bob")), Name(1)); - } - - #[test] - fn string_interner_tests() { - let i : StrInterner = StrInterner::new(); + fn interner_tests() { + let mut i: Interner = Interner::new(); // first one is zero: assert_eq!(i.intern("dog"), Name(0)); // re-use gets the same entry: @@ -294,13 +109,13 @@ mod tests { assert_eq!(i.gensym("dog"), Name(4)); // gensym tests again with gensym_copy: assert_eq!(i.gensym_copy(Name(2)), Name(5)); - assert_eq!(i.get(Name(5)), RcStr::new("zebra")); + assert_eq!(*i.get(Name(5)), "zebra"); assert_eq!(i.gensym_copy(Name(2)), Name(6)); - assert_eq!(i.get(Name(6)), RcStr::new("zebra")); - assert_eq!(i.get(Name(0)), RcStr::new("dog")); - assert_eq!(i.get(Name(1)), RcStr::new("cat")); - assert_eq!(i.get(Name(2)), RcStr::new("zebra")); - assert_eq!(i.get(Name(3)), RcStr::new("zebra")); - assert_eq!(i.get(Name(4)), RcStr::new("dog")); + assert_eq!(*i.get(Name(6)), "zebra"); + assert_eq!(*i.get(Name(0)), "dog"); + assert_eq!(*i.get(Name(1)), "cat"); + assert_eq!(*i.get(Name(2)), "zebra"); + assert_eq!(*i.get(Name(3)), "zebra"); + assert_eq!(*i.get(Name(4)), "dog"); } } |
