diff options
| author | bors <bors@rust-lang.org> | 2025-07-22 08:52:53 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-07-22 08:52:53 +0000 |
| commit | 1a8eaa852507b134fcd0557547b35308273b4b74 (patch) | |
| tree | 5656ed7794f557ce02b603c2aebafeeef5e4c31a /compiler | |
| parent | 9748d87dc70a9a6725c5dbd76ce29d04752b4f90 (diff) | |
| parent | 8a5bcdde9d8668f92bd2f323898d5da1bdc5df5b (diff) | |
| download | rust-try.tar.gz rust-try.zip | |
Auto merge of #144287 - nnethercote:Symbol-with_interner, r=<try> try
Introduce `Symbol::with_interner`. It lets you get the contents of multiple symbols with a single TLS lookup and interner lock, instead of one per symbol. r? `@ghost`
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_ast/src/ast.rs | 40 | ||||
| -rw-r--r-- | compiler/rustc_macros/src/symbols.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_resolve/src/rustdoc.rs | 40 | ||||
| -rw-r--r-- | compiler/rustc_span/src/symbol.rs | 46 |
4 files changed, 82 insertions, 46 deletions
diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index 8c2b521c560..53969e731e2 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -167,25 +167,27 @@ impl Path { /// /// Panics if `path` is empty or a segment after the first is `kw::PathRoot`. pub fn join_path_syms(path: impl IntoIterator<Item = impl Borrow<Symbol>>) -> String { - // This is a guess at the needed capacity that works well in practice. It is slightly faster - // than (a) starting with an empty string, or (b) computing the exact capacity required. - // `8` works well because it's about the right size and jemalloc's size classes are all - // multiples of 8. - let mut iter = path.into_iter(); - let len_hint = iter.size_hint().1.unwrap_or(1); - let mut s = String::with_capacity(len_hint * 8); - - let first_sym = *iter.next().unwrap().borrow(); - if first_sym != kw::PathRoot { - s.push_str(first_sym.as_str()); - } - for sym in iter { - let sym = *sym.borrow(); - debug_assert_ne!(sym, kw::PathRoot); - s.push_str("::"); - s.push_str(sym.as_str()); - } - s + Symbol::with_interner(|interner| { + // This is a guess at the needed capacity that works well in practice. It is slightly + // faster than (a) starting with an empty string, or (b) computing the exact capacity + // required. `8` works well because it's about the right size and jemalloc's size classes + // are all multiples of 8. + let mut iter = path.into_iter(); + let len_hint = iter.size_hint().1.unwrap_or(1); + + let mut s = String::with_capacity(len_hint * 8); + let first_sym = *iter.next().unwrap().borrow(); + if first_sym != kw::PathRoot { + s.push_str(interner.get_str(first_sym)); + } + for sym in iter { + let sym = *sym.borrow(); + debug_assert_ne!(sym, kw::PathRoot); + s.push_str("::"); + s.push_str(interner.get_str(sym)); + } + s + }) } /// Like `join_path_syms`, but for `Ident`s. This function is necessary because diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 78a4d47ca33..455324430ef 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -288,7 +288,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) { const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base; /// The number of predefined symbols; this is the first index for - /// extra pre-interned symbols in an Interner created via + /// extra pre-interned symbols in an interner created via /// [`Interner::with_extra_symbols`]. pub const PREDEFINED_SYMBOLS_COUNT: u32 = #predefined_symbols_count; diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs index 24e15ded94f..49920bdd913 100644 --- a/compiler/rustc_resolve/src/rustdoc.rs +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -141,26 +141,32 @@ pub fn unindent_doc_fragments(docs: &mut [DocFragment]) { // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4 // (5 - 1) whitespaces. - let Some(min_indent) = docs - .iter() - .map(|fragment| { - fragment - .doc - .as_str() - .lines() - .filter(|line| line.chars().any(|c| !c.is_whitespace())) - .map(|line| { - // Compare against either space or tab, ignoring whether they are - // mixed or not. - let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count(); - whitespace - + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }) + let Some(min_indent) = ({ + Symbol::with_interner(|interner| { + docs.iter() + .map(|fragment| { + interner + .get_str(fragment.doc) + .lines() + .filter(|line| line.chars().any(|c| !c.is_whitespace())) + .map(|line| { + // Compare against either space or tab, ignoring whether they are + // mixed or not. + let whitespace = + line.chars().take_while(|c| *c == ' ' || *c == '\t').count(); + whitespace + + (if fragment.kind == DocFragmentKind::SugaredDoc { + 0 + } else { + add + }) + }) + .min() + .unwrap_or(usize::MAX) }) .min() - .unwrap_or(usize::MAX) }) - .min() - else { + }) else { return; }; diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index d54175548e3..f84ffab23cd 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -2632,6 +2632,19 @@ impl Symbol { }) } + /// Runs `f` with access to the symbol interner, so you can call + /// `interner.get_str(sym)` instead of `sym.as_str()`. + /// + /// This is for performance: it lets you get the contents of multiple + /// symbols with a single TLS lookup and interner lock operation, instead + /// of doing those operations once per symbol. + pub fn with_interner<R>(f: impl FnOnce(&InternerInner) -> R) -> R { + with_session_globals(|session_globals| { + let inner = session_globals.symbol_interner.0.lock(); + f(&inner) + }) + } + pub fn as_u32(self) -> u32 { self.0.as_u32() } @@ -2733,14 +2746,13 @@ impl<CTX> HashStable<CTX> for ByteSymbol { // string with identical contents (e.g. "foo" and b"foo") are both interned, // only one copy will be stored and the resulting `Symbol` and `ByteSymbol` // will have the same index. +// +// There must only be one of these, otherwise its easy to mix up symbols +// between interners. pub(crate) struct Interner(Lock<InternerInner>); // The `&'static [u8]`s in this type actually point into the arena. -// -// This type is private to prevent accidentally constructing more than one -// `Interner` on the same thread, which makes it easy to mix up `Symbol`s -// between `Interner`s. -struct InternerInner { +pub struct InternerInner { arena: DroplessArena, byte_strs: FxIndexSet<&'static [u8]>, } @@ -2794,8 +2806,10 @@ impl Interner { /// Get the symbol as a string. /// /// [`Symbol::as_str()`] should be used in preference to this function. + /// (Or [`Symbol::with_interner()`] + [`InternerInner::get_str()`]). fn get_str(&self, symbol: Symbol) -> &str { - let byte_str = self.get_inner(symbol.0.as_usize()); + let inner = self.0.lock(); + let byte_str = inner.byte_strs.get_index(symbol.0.as_usize()).unwrap(); // SAFETY: known to be a UTF8 string because it's a `Symbol`. unsafe { str::from_utf8_unchecked(byte_str) } } @@ -2803,12 +2817,26 @@ impl Interner { /// Get the symbol as a string. /// /// [`ByteSymbol::as_byte_str()`] should be used in preference to this function. + /// (Or [`Symbol::with_interner()`] + [`InternerInner::get_byte_str()`]). fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] { - self.get_inner(symbol.0.as_usize()) + let inner = self.0.lock(); + inner.byte_strs.get_index(symbol.0.as_usize()).unwrap() } +} - fn get_inner(&self, index: usize) -> &[u8] { - self.0.lock().byte_strs.get_index(index).unwrap() +impl InternerInner { + /// Get the symbol as a string. Used with `with_interner`. + #[inline] + pub fn get_str(&self, symbol: Symbol) -> &str { + let byte_str = self.byte_strs.get_index(symbol.0.as_usize()).unwrap(); + // SAFETY: known to be a UTF8 string because it's a `Symbol`. + unsafe { str::from_utf8_unchecked(byte_str) } + } + + /// Get the symbol as a string. Used with `with_interner`. + #[inline] + pub fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] { + self.byte_strs.get_index(symbol.0.as_usize()).unwrap() } } |
