about summary refs log tree commit diff
path: root/src/libsyntax_pos
diff options
context:
space:
mode:
authorkennytm <kennytm@gmail.com>2018-12-14 22:10:09 +0800
committerGitHub <noreply@github.com>2018-12-14 22:10:09 +0800
commitdadf7fcc2dc76dd90a9f5a4eea595cf7add8ffff (patch)
treea9ba7855dc14b44d546cc4b0d2f3509edd68e80e /src/libsyntax_pos
parent53a2de2d549f8f3af99de7c05fa8caa7aa2f2949 (diff)
parent0f68749260d04b61dfe2b38dbc423aefe2914a58 (diff)
downloadrust-dadf7fcc2dc76dd90a9f5a4eea595cf7add8ffff.tar.gz
rust-dadf7fcc2dc76dd90a9f5a4eea595cf7add8ffff.zip
Rollup merge of #56699 - nnethercote:SymbolIndex, r=oli-obk
Use a `newtype_index!` within `Symbol`.

This shrinks `Option<Symbol>` from 8 bytes to 4 bytes, which shrinks
`Token` from 24 bytes to 16 bytes. This reduces instruction counts by up
to 1% across a range of benchmarks.

r? @oli-obk
Diffstat (limited to 'src/libsyntax_pos')
-rw-r--r--src/libsyntax_pos/lib.rs3
-rw-r--r--src/libsyntax_pos/symbol.rs61
2 files changed, 42 insertions, 22 deletions
diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs
index 8b7ffa499cd..9aafb9fc549 100644
--- a/src/libsyntax_pos/lib.rs
+++ b/src/libsyntax_pos/lib.rs
@@ -24,10 +24,13 @@
 #![feature(nll)]
 #![feature(non_exhaustive)]
 #![feature(optin_builtin_traits)]
+#![feature(rustc_attrs)]
 #![feature(specialization)]
+#![feature(step_trait)]
 #![cfg_attr(not(stage0), feature(stdsimd))]
 
 extern crate arena;
+#[macro_use]
 extern crate rustc_data_structures;
 
 #[macro_use]
diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs
index f1adb9d64da..b720db899a1 100644
--- a/src/libsyntax_pos/symbol.rs
+++ b/src/libsyntax_pos/symbol.rs
@@ -14,6 +14,7 @@
 
 use arena::DroplessArena;
 use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::indexed_vec::Idx;
 use serialize::{Decodable, Decoder, Encodable, Encoder};
 
 use std::fmt;
@@ -143,9 +144,18 @@ impl Decodable for Ident {
     }
 }
 
-/// A symbol is an interned or gensymed string.
+/// A symbol is an interned or gensymed string. The use of newtype_index! means
+/// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves
+/// the last 256 values for tagging purposes.
+///
+/// Note that Symbol cannot be a newtype_index! directly because it implements
+/// fmt::Debug, Encodable, and Decodable in special ways.
 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Symbol(u32);
+pub struct Symbol(SymbolIndex);
+
+newtype_index! {
+    pub struct SymbolIndex { .. }
+}
 
 // The interner is pointed to by a thread local value which is only set on the main thread
 // with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
@@ -156,6 +166,10 @@ impl !Send for Symbol { }
 impl !Sync for Symbol { }
 
 impl Symbol {
+    const fn new(n: u32) -> Self {
+        Symbol(SymbolIndex::from_u32_const(n))
+    }
+
     /// Maps a string to its interned representation.
     pub fn intern(string: &str) -> Self {
         with_interner(|interner| interner.intern(string))
@@ -189,7 +203,7 @@ impl Symbol {
     }
 
     pub fn as_u32(self) -> u32 {
-        self.0
+        self.0.as_u32()
     }
 }
 
@@ -197,7 +211,7 @@ impl fmt::Debug for Symbol {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
         if is_gensymed {
-            write!(f, "{}({})", self, self.0)
+            write!(f, "{}({:?})", self, self.0)
         } else {
             write!(f, "{}", self)
         }
@@ -229,6 +243,9 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
 }
 
 // The `&'static str`s in this type actually point into the arena.
+//
+// Note that normal symbols are indexed upward from 0, and gensyms are indexed
+// downward from SymbolIndex::MAX_AS_U32.
 #[derive(Default)]
 pub struct Interner {
     arena: DroplessArena,
@@ -243,7 +260,7 @@ impl Interner {
         for &string in init {
             if string == "" {
                 // We can't allocate empty strings in the arena, so handle this here.
-                let name = Symbol(this.strings.len() as u32);
+                let name = Symbol::new(this.strings.len() as u32);
                 this.names.insert("", name);
                 this.strings.push("");
             } else {
@@ -258,7 +275,7 @@ impl Interner {
             return name;
         }
 
-        let name = Symbol(self.strings.len() as u32);
+        let name = Symbol::new(self.strings.len() as u32);
 
         // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
         // UTF-8.
@@ -276,10 +293,10 @@ impl Interner {
     }
 
     pub fn interned(&self, symbol: Symbol) -> Symbol {
-        if (symbol.0 as usize) < self.strings.len() {
+        if (symbol.0.as_usize()) < self.strings.len() {
             symbol
         } else {
-            self.interned(self.gensyms[(!0 - symbol.0) as usize])
+            self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
         }
     }
 
@@ -290,17 +307,17 @@ impl Interner {
 
     fn gensymed(&mut self, symbol: Symbol) -> Symbol {
         self.gensyms.push(symbol);
-        Symbol(!0 - self.gensyms.len() as u32 + 1)
+        Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
     }
 
     fn is_gensymed(&mut self, symbol: Symbol) -> bool {
-        symbol.0 as usize >= self.strings.len()
+        symbol.0.as_usize() >= self.strings.len()
     }
 
     pub fn get(&self, symbol: Symbol) -> &str {
-        match self.strings.get(symbol.0 as usize) {
+        match self.strings.get(symbol.0.as_usize()) {
             Some(string) => string,
-            None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
+            None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
         }
     }
 }
@@ -324,7 +341,7 @@ macro_rules! declare_keywords {(
         $(
             #[allow(non_upper_case_globals)]
             pub const $konst: Keyword = Keyword {
-                ident: Ident::with_empty_ctxt(super::Symbol($index))
+                ident: Ident::with_empty_ctxt(super::Symbol::new($index))
             };
         )*
 
@@ -709,19 +726,19 @@ mod tests {
     fn interner_tests() {
         let mut i: Interner = Interner::default();
         // first one is zero:
-        assert_eq!(i.intern("dog"), Symbol(0));
+        assert_eq!(i.intern("dog"), Symbol::new(0));
         // re-use gets the same entry:
-        assert_eq!(i.intern("dog"), Symbol(0));
+        assert_eq!(i.intern("dog"), Symbol::new(0));
         // different string gets a different #:
-        assert_eq!(i.intern("cat"), Symbol(1));
-        assert_eq!(i.intern("cat"), Symbol(1));
+        assert_eq!(i.intern("cat"), Symbol::new(1));
+        assert_eq!(i.intern("cat"), Symbol::new(1));
         // dog is still at zero
-        assert_eq!(i.intern("dog"), Symbol(0));
-        assert_eq!(i.gensym("zebra"), Symbol(4294967295));
-        // gensym of same string gets new number :
-        assert_eq!(i.gensym("zebra"), Symbol(4294967294));
+        assert_eq!(i.intern("dog"), Symbol::new(0));
+        assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32));
+        // gensym of same string gets new number:
+        assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
         // gensym of *existing* string gets new number:
-        assert_eq!(i.gensym("dog"), Symbol(4294967293));
+        assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
     }
 
     #[test]