about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2025-07-22 08:52:53 +0000
committerbors <bors@rust-lang.org>2025-07-22 08:52:53 +0000
commit1a8eaa852507b134fcd0557547b35308273b4b74 (patch)
tree5656ed7794f557ce02b603c2aebafeeef5e4c31a
parent9748d87dc70a9a6725c5dbd76ce29d04752b4f90 (diff)
parent8a5bcdde9d8668f92bd2f323898d5da1bdc5df5b (diff)
downloadrust-try.tar.gz
rust-try.zip
Auto merge of #144287 - nnethercote:Symbol-with_interner, r=<try> try
Introduce `Symbol::with_interner`.

It lets you get the contents of multiple symbols with a single TLS lookup and interner lock, instead of one per symbol.

r? `@ghost`
-rw-r--r--compiler/rustc_ast/src/ast.rs40
-rw-r--r--compiler/rustc_macros/src/symbols.rs2
-rw-r--r--compiler/rustc_resolve/src/rustdoc.rs40
-rw-r--r--compiler/rustc_span/src/symbol.rs46
-rw-r--r--src/librustdoc/html/render/print_item.rs17
-rw-r--r--src/librustdoc/html/render/search_index.rs16
-rw-r--r--src/librustdoc/html/url_parts_builder.rs17
7 files changed, 111 insertions, 67 deletions
diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs
index 8c2b521c560..53969e731e2 100644
--- a/compiler/rustc_ast/src/ast.rs
+++ b/compiler/rustc_ast/src/ast.rs
@@ -167,25 +167,27 @@ impl Path {
 ///
 /// Panics if `path` is empty or a segment after the first is `kw::PathRoot`.
 pub fn join_path_syms(path: impl IntoIterator<Item = impl Borrow<Symbol>>) -> String {
-    // This is a guess at the needed capacity that works well in practice. It is slightly faster
-    // than (a) starting with an empty string, or (b) computing the exact capacity required.
-    // `8` works well because it's about the right size and jemalloc's size classes are all
-    // multiples of 8.
-    let mut iter = path.into_iter();
-    let len_hint = iter.size_hint().1.unwrap_or(1);
-    let mut s = String::with_capacity(len_hint * 8);
-
-    let first_sym = *iter.next().unwrap().borrow();
-    if first_sym != kw::PathRoot {
-        s.push_str(first_sym.as_str());
-    }
-    for sym in iter {
-        let sym = *sym.borrow();
-        debug_assert_ne!(sym, kw::PathRoot);
-        s.push_str("::");
-        s.push_str(sym.as_str());
-    }
-    s
+    Symbol::with_interner(|interner| {
+        // This is a guess at the needed capacity that works well in practice. It is slightly
+        // faster than (a) starting with an empty string, or (b) computing the exact capacity
+        // required. `8` works well because it's about the right size and jemalloc's size classes
+        // are all multiples of 8.
+        let mut iter = path.into_iter();
+        let len_hint = iter.size_hint().1.unwrap_or(1);
+
+        let mut s = String::with_capacity(len_hint * 8);
+        let first_sym = *iter.next().unwrap().borrow();
+        if first_sym != kw::PathRoot {
+            s.push_str(interner.get_str(first_sym));
+        }
+        for sym in iter {
+            let sym = *sym.borrow();
+            debug_assert_ne!(sym, kw::PathRoot);
+            s.push_str("::");
+            s.push_str(interner.get_str(sym));
+        }
+        s
+    })
 }
 
 /// Like `join_path_syms`, but for `Ident`s. This function is necessary because
diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs
index 78a4d47ca33..455324430ef 100644
--- a/compiler/rustc_macros/src/symbols.rs
+++ b/compiler/rustc_macros/src/symbols.rs
@@ -288,7 +288,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
         const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base;
 
         /// The number of predefined symbols; this is the first index for
-        /// extra pre-interned symbols in an Interner created via
+        /// extra pre-interned symbols in an interner created via
         /// [`Interner::with_extra_symbols`].
         pub const PREDEFINED_SYMBOLS_COUNT: u32 = #predefined_symbols_count;
 
diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs
index 24e15ded94f..49920bdd913 100644
--- a/compiler/rustc_resolve/src/rustdoc.rs
+++ b/compiler/rustc_resolve/src/rustdoc.rs
@@ -141,26 +141,32 @@ pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
     // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
     // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
     // (5 - 1) whitespaces.
-    let Some(min_indent) = docs
-        .iter()
-        .map(|fragment| {
-            fragment
-                .doc
-                .as_str()
-                .lines()
-                .filter(|line| line.chars().any(|c| !c.is_whitespace()))
-                .map(|line| {
-                    // Compare against either space or tab, ignoring whether they are
-                    // mixed or not.
-                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
-                    whitespace
-                        + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
+    let Some(min_indent) = ({
+        Symbol::with_interner(|interner| {
+            docs.iter()
+                .map(|fragment| {
+                    interner
+                        .get_str(fragment.doc)
+                        .lines()
+                        .filter(|line| line.chars().any(|c| !c.is_whitespace()))
+                        .map(|line| {
+                            // Compare against either space or tab, ignoring whether they are
+                            // mixed or not.
+                            let whitespace =
+                                line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
+                            whitespace
+                                + (if fragment.kind == DocFragmentKind::SugaredDoc {
+                                    0
+                                } else {
+                                    add
+                                })
+                        })
+                        .min()
+                        .unwrap_or(usize::MAX)
                 })
                 .min()
-                .unwrap_or(usize::MAX)
         })
-        .min()
-    else {
+    }) else {
         return;
     };
 
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index d54175548e3..f84ffab23cd 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -2632,6 +2632,19 @@ impl Symbol {
         })
     }
 
+    /// Runs `f` with access to the symbol interner, so you can call
+    /// `interner.get_str(sym)` instead of `sym.as_str()`.
+    ///
+    /// This is for performance: it lets you get the contents of multiple
+    /// symbols with a single TLS lookup and interner lock operation, instead
+    /// of doing those operations once per symbol.
+    pub fn with_interner<R>(f: impl FnOnce(&InternerInner) -> R) -> R {
+        with_session_globals(|session_globals| {
+            let inner = session_globals.symbol_interner.0.lock();
+            f(&inner)
+        })
+    }
+
     pub fn as_u32(self) -> u32 {
         self.0.as_u32()
     }
@@ -2733,14 +2746,13 @@ impl<CTX> HashStable<CTX> for ByteSymbol {
 // string with identical contents (e.g. "foo" and b"foo") are both interned,
 // only one copy will be stored and the resulting `Symbol` and `ByteSymbol`
 // will have the same index.
+//
+// There must only be one of these, otherwise its easy to mix up symbols
+// between interners.
 pub(crate) struct Interner(Lock<InternerInner>);
 
 // The `&'static [u8]`s in this type actually point into the arena.
-//
-// This type is private to prevent accidentally constructing more than one
-// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
-// between `Interner`s.
-struct InternerInner {
+pub struct InternerInner {
     arena: DroplessArena,
     byte_strs: FxIndexSet<&'static [u8]>,
 }
@@ -2794,8 +2806,10 @@ impl Interner {
     /// Get the symbol as a string.
     ///
     /// [`Symbol::as_str()`] should be used in preference to this function.
+    /// (Or [`Symbol::with_interner()`] + [`InternerInner::get_str()`]).
     fn get_str(&self, symbol: Symbol) -> &str {
-        let byte_str = self.get_inner(symbol.0.as_usize());
+        let inner = self.0.lock();
+        let byte_str = inner.byte_strs.get_index(symbol.0.as_usize()).unwrap();
         // SAFETY: known to be a UTF8 string because it's a `Symbol`.
         unsafe { str::from_utf8_unchecked(byte_str) }
     }
@@ -2803,12 +2817,26 @@ impl Interner {
     /// Get the symbol as a string.
     ///
     /// [`ByteSymbol::as_byte_str()`] should be used in preference to this function.
+    /// (Or [`Symbol::with_interner()`] + [`InternerInner::get_byte_str()`]).
     fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
-        self.get_inner(symbol.0.as_usize())
+        let inner = self.0.lock();
+        inner.byte_strs.get_index(symbol.0.as_usize()).unwrap()
     }
+}
 
-    fn get_inner(&self, index: usize) -> &[u8] {
-        self.0.lock().byte_strs.get_index(index).unwrap()
+impl InternerInner {
+    /// Get the symbol as a string. Used with `with_interner`.
+    #[inline]
+    pub fn get_str(&self, symbol: Symbol) -> &str {
+        let byte_str = self.byte_strs.get_index(symbol.0.as_usize()).unwrap();
+        // SAFETY: known to be a UTF8 string because it's a `Symbol`.
+        unsafe { str::from_utf8_unchecked(byte_str) }
+    }
+
+    /// Get the symbol as a string. Used with `with_interner`.
+    #[inline]
+    pub fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
+        self.byte_strs.get_index(symbol.0.as_usize()).unwrap()
     }
 }
 
diff --git a/src/librustdoc/html/render/print_item.rs b/src/librustdoc/html/render/print_item.rs
index 02ee34aaac6..aca2eb53c11 100644
--- a/src/librustdoc/html/render/print_item.rs
+++ b/src/librustdoc/html/render/print_item.rs
@@ -12,7 +12,7 @@ use rustc_hir::def_id::DefId;
 use rustc_index::IndexVec;
 use rustc_middle::ty::{self, TyCtxt};
 use rustc_span::hygiene::MacroKind;
-use rustc_span::symbol::{Symbol, sym};
+use rustc_span::symbol::{InternerInner, Symbol, sym};
 use tracing::{debug, info};
 
 use super::type_layout::document_type_layout;
@@ -333,7 +333,12 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i
             }
         }
 
-        fn cmp(i1: &clean::Item, i2: &clean::Item, tcx: TyCtxt<'_>) -> Ordering {
+        fn cmp(
+            i1: &clean::Item,
+            i2: &clean::Item,
+            interner: &InternerInner,
+            tcx: TyCtxt<'_>,
+        ) -> Ordering {
             let rty1 = reorder(i1.type_());
             let rty2 = reorder(i2.type_());
             if rty1 != rty2 {
@@ -349,7 +354,9 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i
                 return is_stable2.cmp(&is_stable1);
             }
             match (i1.name, i2.name) {
-                (Some(name1), Some(name2)) => compare_names(name1.as_str(), name2.as_str()),
+                (Some(name1), Some(name2)) => {
+                    compare_names(interner.get_str(name1), interner.get_str(name2))
+                }
                 (Some(_), None) => Ordering::Greater,
                 (None, Some(_)) => Ordering::Less,
                 (None, None) => Ordering::Equal,
@@ -360,7 +367,9 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i
 
         match cx.shared.module_sorting {
             ModuleSorting::Alphabetical => {
-                not_stripped_items.sort_by(|(_, i1), (_, i2)| cmp(i1, i2, tcx));
+                Symbol::with_interner(|interner| {
+                    not_stripped_items.sort_by(|(_, i1), (_, i2)| cmp(i1, i2, interner, tcx));
+                });
             }
             ModuleSorting::DeclarationOrder => {}
         }
diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs
index 3c9be29ccc3..6f1765a598b 100644
--- a/src/librustdoc/html/render/search_index.rs
+++ b/src/librustdoc/html/render/search_index.rs
@@ -105,12 +105,16 @@ pub(crate) fn build_index(
     let mut aliases: BTreeMap<String, Vec<usize>> = BTreeMap::new();
 
     // Sort search index items. This improves the compressibility of the search index.
-    cache.search_index.sort_unstable_by(|k1, k2| {
-        // `sort_unstable_by_key` produces lifetime errors
-        // HACK(rustdoc): should not be sorting `CrateNum` or `DefIndex`, this will soon go away, too
-        let k1 = (&k1.path, k1.name.as_str(), &k1.ty, k1.parent.map(|id| (id.index, id.krate)));
-        let k2 = (&k2.path, k2.name.as_str(), &k2.ty, k2.parent.map(|id| (id.index, id.krate)));
-        Ord::cmp(&k1, &k2)
+    Symbol::with_interner(|inner| {
+        cache.search_index.sort_unstable_by(|k1, k2| {
+            // `sort_unstable_by_key` produces lifetime errors
+            // HACK(rustdoc): should not be sorting `CrateNum` or `DefIndex`, this will soon go
+            // away, too
+            let pair = |k: &IndexItem| k.parent.map(|id| (id.index, id.krate));
+            let k1 = (&k1.path, inner.get_str(k1.name), &k1.ty, pair(k1));
+            let k2 = (&k2.path, inner.get_str(k2.name), &k2.ty, pair(k2));
+            Ord::cmp(&k1, &k2)
+        });
     });
 
     // Set up alias indexes.
diff --git a/src/librustdoc/html/url_parts_builder.rs b/src/librustdoc/html/url_parts_builder.rs
index 705fa498e8d..09368d69e45 100644
--- a/src/librustdoc/html/url_parts_builder.rs
+++ b/src/librustdoc/html/url_parts_builder.rs
@@ -146,22 +146,17 @@ impl<'a> Extend<&'a str> for UrlPartsBuilder {
 
 impl FromIterator<Symbol> for UrlPartsBuilder {
     fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
-        // This code has to be duplicated from the `&str` impl because of
-        // `Symbol::as_str`'s lifetimes.
-        let iter = iter.into_iter();
-        let mut builder = Self::with_capacity_bytes(AVG_PART_LENGTH * iter.size_hint().0);
-        iter.for_each(|part| builder.push(part.as_str()));
-        builder
+        Symbol::with_interner(|interner| {
+            UrlPartsBuilder::from_iter(iter.into_iter().map(|sym| interner.get_str(sym)))
+        })
     }
 }
 
 impl Extend<Symbol> for UrlPartsBuilder {
     fn extend<T: IntoIterator<Item = Symbol>>(&mut self, iter: T) {
-        // This code has to be duplicated from the `&str` impl because of
-        // `Symbol::as_str`'s lifetimes.
-        let iter = iter.into_iter();
-        self.buf.reserve(AVG_PART_LENGTH * iter.size_hint().0);
-        iter.for_each(|part| self.push(part.as_str()));
+        Symbol::with_interner(|interner| {
+            self.extend(iter.into_iter().map(|sym| interner.get_str(sym)))
+        })
     }
 }