diff options
Diffstat (limited to 'src/libsyntax_pos/symbol.rs')
| -rw-r--r-- | src/libsyntax_pos/symbol.rs | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs new file mode 100644 index 00000000000..b866652c49f --- /dev/null +++ b/src/libsyntax_pos/symbol.rs @@ -0,0 +1,389 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! An "interner" is a data structure that associates values with usize tags and +//! allows bidirectional lookup; i.e. given a value, one can easily find the +//! type, and vice versa. + +use hygiene::SyntaxContext; + +use serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::cell::RefCell; +use std::collections::HashMap; +use std::fmt; + +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Ident { + pub name: Symbol, + pub ctxt: SyntaxContext, +} + +impl Ident { + pub const fn with_empty_ctxt(name: Symbol) -> Ident { + Ident { name: name, ctxt: SyntaxContext::empty() } + } + + /// Maps a string to an identifier with an empty syntax context. + pub fn from_str(string: &str) -> Ident { + Ident::with_empty_ctxt(Symbol::intern(string)) + } + + pub fn unhygienize(self) -> Ident { + Ident { name: self.name, ctxt: SyntaxContext::empty() } + } +} + +impl fmt::Debug for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}{:?}", self.name, self.ctxt) + } +} + +impl fmt::Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.name, f) + } +} + +impl Encodable for Ident { + fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { + self.name.encode(s) + } +} + +impl Decodable for Ident { + fn decode<D: Decoder>(d: &mut D) -> Result<Ident, D::Error> { + Ok(Ident::with_empty_ctxt(Symbol::decode(d)?)) + } +} + +/// A symbol is an interned or gensymed string. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Symbol(u32); + +// The interner in thread-local, so `Symbol` shouldn't move between threads. +impl !Send for Symbol { } + +impl Symbol { + /// Maps a string to its interned representation. + pub fn intern(string: &str) -> Self { + with_interner(|interner| interner.intern(string)) + } + + /// gensym's a new usize, using the current interner. + pub fn gensym(string: &str) -> Self { + with_interner(|interner| interner.gensym(string)) + } + + pub fn as_str(self) -> InternedString { + with_interner(|interner| unsafe { + InternedString { + string: ::std::mem::transmute::<&str, &str>(interner.get(self)) + } + }) + } + + pub fn as_u32(self) -> u32 { + self.0 + } +} + +impl fmt::Debug for Symbol { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}({})", self, self.0) + } +} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.as_str(), f) + } +} + +impl Encodable for Symbol { + fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_str(&self.as_str()) + } +} + +impl Decodable for Symbol { + fn decode<D: Decoder>(d: &mut D) -> Result<Symbol, D::Error> { + Ok(Symbol::intern(&d.read_str()?)) + } +} + +impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol { + fn eq(&self, other: &T) -> bool { + self.as_str() == other.deref() + } +} + +#[derive(Default)] +pub struct Interner { + names: HashMap<Box<str>, Symbol>, + strings: Vec<Box<str>>, +} + +impl Interner { + pub fn new() -> Self { + Interner::default() + } + + fn prefill(init: &[&str]) -> Self { + let mut this = Interner::new(); + for &string in init { + this.intern(string); + } + this + } + + pub fn intern(&mut self, string: &str) -> Symbol { + if let Some(&name) = self.names.get(string) { + return name; + } + + let name = Symbol(self.strings.len() as u32); + let string = string.to_string().into_boxed_str(); + self.strings.push(string.clone()); + self.names.insert(string, name); + name + } + + fn gensym(&mut self, string: &str) -> Symbol { + let gensym = Symbol(self.strings.len() as u32); + // leave out of `names` to avoid colliding + self.strings.push(string.to_string().into_boxed_str()); + gensym + } + + pub fn get(&self, name: Symbol) -> &str { + &self.strings[name.0 as usize] + } +} + +// In this macro, there is the requirement that the name (the number) must be monotonically +// increasing by one in the special identifiers, starting at 0; the same holds for the keywords, +// except starting from the next number instead of zero. +macro_rules! declare_keywords {( + $( ($index: expr, $konst: ident, $string: expr) )* +) => { + pub mod keywords { + use super::{Symbol, Ident}; + #[derive(Clone, Copy, PartialEq, Eq)] + pub struct Keyword { + ident: Ident, + } + impl Keyword { + #[inline] pub fn ident(self) -> Ident { self.ident } + #[inline] pub fn name(self) -> Symbol { self.ident.name } + } + $( + #[allow(non_upper_case_globals)] + pub const $konst: Keyword = Keyword { + ident: Ident::with_empty_ctxt(super::Symbol($index)) + }; + )* + } + + impl Interner { + fn fresh() -> Self { + Interner::prefill(&[$($string,)*]) + } + } +}} + +// NB: leaving holes in the ident table is bad! a different ident will get +// interned with the id from the hole, but it will be between the min and max +// of the reserved words, and thus tagged as "reserved". +// After modifying this list adjust `is_strict_keyword`/`is_reserved_keyword`, +// this should be rarely necessary though if the keywords are kept in alphabetic order. +declare_keywords! { + // Invalid identifier + (0, Invalid, "") + + // Strict keywords used in the language. + (1, As, "as") + (2, Box, "box") + (3, Break, "break") + (4, Const, "const") + (5, Continue, "continue") + (6, Crate, "crate") + (7, Else, "else") + (8, Enum, "enum") + (9, Extern, "extern") + (10, False, "false") + (11, Fn, "fn") + (12, For, "for") + (13, If, "if") + (14, Impl, "impl") + (15, In, "in") + (16, Let, "let") + (17, Loop, "loop") + (18, Match, "match") + (19, Mod, "mod") + (20, Move, "move") + (21, Mut, "mut") + (22, Pub, "pub") + (23, Ref, "ref") + (24, Return, "return") + (25, SelfValue, "self") + (26, SelfType, "Self") + (27, Static, "static") + (28, Struct, "struct") + (29, Super, "super") + (30, Trait, "trait") + (31, True, "true") + (32, Type, "type") + (33, Unsafe, "unsafe") + (34, Use, "use") + (35, Where, "where") + (36, While, "while") + + // Keywords reserved for future use. + (37, Abstract, "abstract") + (38, Alignof, "alignof") + (39, Become, "become") + (40, Do, "do") + (41, Final, "final") + (42, Macro, "macro") + (43, Offsetof, "offsetof") + (44, Override, "override") + (45, Priv, "priv") + (46, Proc, "proc") + (47, Pure, "pure") + (48, Sizeof, "sizeof") + (49, Typeof, "typeof") + (50, Unsized, "unsized") + (51, Virtual, "virtual") + (52, Yield, "yield") + + // Weak keywords, have special meaning only in specific contexts. + (53, Default, "default") + (54, StaticLifetime, "'static") + (55, Union, "union") + (56, Catch, "catch") + + // A virtual keyword that resolves to the crate root when used in a lexical scope. + (57, CrateRoot, "{{root}}") +} + +// If an interner exists in TLS, return it. Otherwise, prepare a fresh one. +fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T { + thread_local!(static INTERNER: RefCell<Interner> = { + RefCell::new(Interner::fresh()) + }); + INTERNER.with(|interner| f(&mut *interner.borrow_mut())) +} + +/// Represents a string stored in the thread-local interner. Because the +/// interner lives for the life of the thread, this can be safely treated as an +/// immortal string, as long as it never crosses between threads. +/// +/// FIXME(pcwalton): You must be careful about what you do in the destructors +/// of objects stored in TLS, because they may run after the interner is +/// destroyed. In particular, they must not access string contents. This can +/// be fixed in the future by just leaking all strings until thread death +/// somehow. +#[derive(Clone, Hash, PartialOrd, Eq, Ord)] +pub struct InternedString { + string: &'static str, +} + +impl<U: ?Sized> ::std::convert::AsRef<U> for InternedString where str: ::std::convert::AsRef<U> { + fn as_ref(&self) -> &U { + self.string.as_ref() + } +} + +impl<T: ::std::ops::Deref<Target = str>> ::std::cmp::PartialEq<T> for InternedString { + fn eq(&self, other: &T) -> bool { + self.string == other.deref() + } +} + +impl ::std::cmp::PartialEq<InternedString> for str { + fn eq(&self, other: &InternedString) -> bool { + self == other.string + } +} + +impl<'a> ::std::cmp::PartialEq<InternedString> for &'a str { + fn eq(&self, other: &InternedString) -> bool { + *self == other.string + } +} + +impl ::std::cmp::PartialEq<InternedString> for String { + fn eq(&self, other: &InternedString) -> bool { + self == other.string + } +} + +impl<'a> ::std::cmp::PartialEq<InternedString> for &'a String { + fn eq(&self, other: &InternedString) -> bool { + *self == other.string + } +} + +impl !Send for InternedString { } + +impl ::std::ops::Deref for InternedString { + type Target = str; + fn deref(&self) -> &str { self.string } +} + +impl fmt::Debug for InternedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self.string, f) + } +} + +impl fmt::Display for InternedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.string, f) + } +} + +impl Decodable for InternedString { + fn decode<D: Decoder>(d: &mut D) -> Result<InternedString, D::Error> { + Ok(Symbol::intern(&d.read_str()?).as_str()) + } +} + +impl Encodable for InternedString { + fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_str(self.string) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn interner_tests() { + let mut i: Interner = Interner::new(); + // first one is zero: + assert_eq!(i.intern("dog"), Symbol(0)); + // re-use gets the same entry: + assert_eq!(i.intern ("dog"), Symbol(0)); + // different string gets a different #: + assert_eq!(i.intern("cat"), Symbol(1)); + assert_eq!(i.intern("cat"), Symbol(1)); + // dog is still at zero + assert_eq!(i.intern("dog"), Symbol(0)); + // gensym gets 3 + assert_eq!(i.gensym("zebra"), Symbol(2)); + // gensym of same string gets new number : + assert_eq!(i.gensym("zebra"), Symbol(3)); + // gensym of *existing* string gets new number: + assert_eq!(i.gensym("dog"), Symbol(4)); + } +} |
