about summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2025-07-22 08:52:53 +0000
committerbors <bors@rust-lang.org>2025-07-22 08:52:53 +0000
commit1a8eaa852507b134fcd0557547b35308273b4b74 (patch)
tree5656ed7794f557ce02b603c2aebafeeef5e4c31a /compiler
parent9748d87dc70a9a6725c5dbd76ce29d04752b4f90 (diff)
parent8a5bcdde9d8668f92bd2f323898d5da1bdc5df5b (diff)
downloadrust-try.tar.gz
rust-try.zip
Auto merge of #144287 - nnethercote:Symbol-with_interner, r=<try> try
Introduce `Symbol::with_interner`.

It lets you get the contents of multiple symbols with a single TLS lookup and interner lock, instead of one per symbol.

r? `@ghost`
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_ast/src/ast.rs40
-rw-r--r--compiler/rustc_macros/src/symbols.rs2
-rw-r--r--compiler/rustc_resolve/src/rustdoc.rs40
-rw-r--r--compiler/rustc_span/src/symbol.rs46
4 files changed, 82 insertions, 46 deletions
diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs
index 8c2b521c560..53969e731e2 100644
--- a/compiler/rustc_ast/src/ast.rs
+++ b/compiler/rustc_ast/src/ast.rs
@@ -167,25 +167,27 @@ impl Path {
 ///
 /// Panics if `path` is empty or a segment after the first is `kw::PathRoot`.
 pub fn join_path_syms(path: impl IntoIterator<Item = impl Borrow<Symbol>>) -> String {
-    // This is a guess at the needed capacity that works well in practice. It is slightly faster
-    // than (a) starting with an empty string, or (b) computing the exact capacity required.
-    // `8` works well because it's about the right size and jemalloc's size classes are all
-    // multiples of 8.
-    let mut iter = path.into_iter();
-    let len_hint = iter.size_hint().1.unwrap_or(1);
-    let mut s = String::with_capacity(len_hint * 8);
-
-    let first_sym = *iter.next().unwrap().borrow();
-    if first_sym != kw::PathRoot {
-        s.push_str(first_sym.as_str());
-    }
-    for sym in iter {
-        let sym = *sym.borrow();
-        debug_assert_ne!(sym, kw::PathRoot);
-        s.push_str("::");
-        s.push_str(sym.as_str());
-    }
-    s
+    Symbol::with_interner(|interner| {
+        // This is a guess at the needed capacity that works well in practice. It is slightly
+        // faster than (a) starting with an empty string, or (b) computing the exact capacity
+        // required. `8` works well because it's about the right size and jemalloc's size classes
+        // are all multiples of 8.
+        let mut iter = path.into_iter();
+        let len_hint = iter.size_hint().1.unwrap_or(1);
+
+        let mut s = String::with_capacity(len_hint * 8);
+        let first_sym = *iter.next().unwrap().borrow();
+        if first_sym != kw::PathRoot {
+            s.push_str(interner.get_str(first_sym));
+        }
+        for sym in iter {
+            let sym = *sym.borrow();
+            debug_assert_ne!(sym, kw::PathRoot);
+            s.push_str("::");
+            s.push_str(interner.get_str(sym));
+        }
+        s
+    })
 }
 
 /// Like `join_path_syms`, but for `Ident`s. This function is necessary because
diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs
index 78a4d47ca33..455324430ef 100644
--- a/compiler/rustc_macros/src/symbols.rs
+++ b/compiler/rustc_macros/src/symbols.rs
@@ -288,7 +288,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
         const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base;
 
         /// The number of predefined symbols; this is the first index for
-        /// extra pre-interned symbols in an Interner created via
+        /// extra pre-interned symbols in an interner created via
         /// [`Interner::with_extra_symbols`].
         pub const PREDEFINED_SYMBOLS_COUNT: u32 = #predefined_symbols_count;
 
diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs
index 24e15ded94f..49920bdd913 100644
--- a/compiler/rustc_resolve/src/rustdoc.rs
+++ b/compiler/rustc_resolve/src/rustdoc.rs
@@ -141,26 +141,32 @@ pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
     // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
     // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
     // (5 - 1) whitespaces.
-    let Some(min_indent) = docs
-        .iter()
-        .map(|fragment| {
-            fragment
-                .doc
-                .as_str()
-                .lines()
-                .filter(|line| line.chars().any(|c| !c.is_whitespace()))
-                .map(|line| {
-                    // Compare against either space or tab, ignoring whether they are
-                    // mixed or not.
-                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
-                    whitespace
-                        + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
+    let Some(min_indent) = ({
+        Symbol::with_interner(|interner| {
+            docs.iter()
+                .map(|fragment| {
+                    interner
+                        .get_str(fragment.doc)
+                        .lines()
+                        .filter(|line| line.chars().any(|c| !c.is_whitespace()))
+                        .map(|line| {
+                            // Compare against either space or tab, ignoring whether they are
+                            // mixed or not.
+                            let whitespace =
+                                line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
+                            whitespace
+                                + (if fragment.kind == DocFragmentKind::SugaredDoc {
+                                    0
+                                } else {
+                                    add
+                                })
+                        })
+                        .min()
+                        .unwrap_or(usize::MAX)
                 })
                 .min()
-                .unwrap_or(usize::MAX)
         })
-        .min()
-    else {
+    }) else {
         return;
     };
 
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index d54175548e3..f84ffab23cd 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -2632,6 +2632,19 @@ impl Symbol {
         })
     }
 
+    /// Runs `f` with access to the symbol interner, so you can call
+    /// `interner.get_str(sym)` instead of `sym.as_str()`.
+    ///
+    /// This is for performance: it lets you get the contents of multiple
+    /// symbols with a single TLS lookup and interner lock operation, instead
+    /// of doing those operations once per symbol.
+    pub fn with_interner<R>(f: impl FnOnce(&InternerInner) -> R) -> R {
+        with_session_globals(|session_globals| {
+            let inner = session_globals.symbol_interner.0.lock();
+            f(&inner)
+        })
+    }
+
     pub fn as_u32(self) -> u32 {
         self.0.as_u32()
     }
@@ -2733,14 +2746,13 @@ impl<CTX> HashStable<CTX> for ByteSymbol {
 // string with identical contents (e.g. "foo" and b"foo") are both interned,
 // only one copy will be stored and the resulting `Symbol` and `ByteSymbol`
 // will have the same index.
+//
+// There must only be one of these, otherwise its easy to mix up symbols
+// between interners.
 pub(crate) struct Interner(Lock<InternerInner>);
 
 // The `&'static [u8]`s in this type actually point into the arena.
-//
-// This type is private to prevent accidentally constructing more than one
-// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
-// between `Interner`s.
-struct InternerInner {
+pub struct InternerInner {
     arena: DroplessArena,
     byte_strs: FxIndexSet<&'static [u8]>,
 }
@@ -2794,8 +2806,10 @@ impl Interner {
     /// Get the symbol as a string.
     ///
     /// [`Symbol::as_str()`] should be used in preference to this function.
+    /// (Or [`Symbol::with_interner()`] + [`InternerInner::get_str()`]).
     fn get_str(&self, symbol: Symbol) -> &str {
-        let byte_str = self.get_inner(symbol.0.as_usize());
+        let inner = self.0.lock();
+        let byte_str = inner.byte_strs.get_index(symbol.0.as_usize()).unwrap();
         // SAFETY: known to be a UTF8 string because it's a `Symbol`.
         unsafe { str::from_utf8_unchecked(byte_str) }
     }
@@ -2803,12 +2817,26 @@ impl Interner {
     /// Get the symbol as a string.
     ///
     /// [`ByteSymbol::as_byte_str()`] should be used in preference to this function.
+    /// (Or [`Symbol::with_interner()`] + [`InternerInner::get_byte_str()`]).
     fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
-        self.get_inner(symbol.0.as_usize())
+        let inner = self.0.lock();
+        inner.byte_strs.get_index(symbol.0.as_usize()).unwrap()
     }
+}
 
-    fn get_inner(&self, index: usize) -> &[u8] {
-        self.0.lock().byte_strs.get_index(index).unwrap()
+impl InternerInner {
+    /// Get the symbol as a string. Used with `with_interner`.
+    #[inline]
+    pub fn get_str(&self, symbol: Symbol) -> &str {
+        let byte_str = self.byte_strs.get_index(symbol.0.as_usize()).unwrap();
+        // SAFETY: known to be a UTF8 string because it's a `Symbol`.
+        unsafe { str::from_utf8_unchecked(byte_str) }
+    }
+
+    /// Get the symbol as a string. Used with `with_interner`.
+    #[inline]
+    pub fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
+        self.byte_strs.get_index(symbol.0.as_usize()).unwrap()
     }
 }