diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2025-05-27 15:51:47 +1000 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2025-07-17 08:37:19 +1000 |
| commit | fb7aa9e4fdb88a4833274303899b9801ef924100 (patch) | |
| tree | a233e82235ba815b323bf8d4626b8ed9b05e7de7 /compiler | |
| parent | 5795086bdfe7ed988aa53a110bd0692c33d8755b (diff) | |
| download | rust-fb7aa9e4fdb88a4833274303899b9801ef924100.tar.gz rust-fb7aa9e4fdb88a4833274303899b9801ef924100.zip | |
Improve path segment joining.
There are many places that join path segments with `::` to produce a
string. A lot of these use `join("::")`. Many in rustdoc use
`join_with_double_colon`, and a few use `.joined("..")`. One in Clippy
uses `itertools::join`. A couple of them look for `kw::PathRoot` in the
first segment, which can be important.
This commit introduces `rustc_ast::join_path_{syms,ident}` to do the
joining for everyone. `rustc_ast` is as good a location for these as
any, being the earliest-running of the several crates with a `Path`
type. Two functions are needed because `Ident` printing is more complex
than simple `Symbol` printing.
The commit also removes `join_with_double_colon`, and
`estimate_item_path_byte_length` with it.
There are still a handful of places that join strings with "::" that are
unchanged. They are not that important: some of them are in tests, and
some of them first split a path around "::" and then rejoin with "::".
This fixes one test case where `{{root}}` shows up in an error message.
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_ast/src/ast.rs | 55 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/source_util.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/test.rs | 9 | ||||
| -rw-r--r-- | compiler/rustc_hir/src/hir.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_hir_typeck/src/method/prelude_edition_lints.rs | 11 | ||||
| -rw-r--r-- | compiler/rustc_passes/src/check_attr.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_resolve/src/diagnostics.rs | 29 | ||||
| -rw-r--r-- | compiler/rustc_resolve/src/rustdoc.rs | 10 |
8 files changed, 85 insertions, 43 deletions
diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index 3c576316f62..fa542a810dc 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -18,7 +18,7 @@ //! - [`Attribute`]: Metadata associated with item. //! - [`UnOp`], [`BinOp`], and [`BinOpKind`]: Unary and binary operators. -use std::borrow::Cow; +use std::borrow::{Borrow, Cow}; use std::{cmp, fmt}; pub use GenericArgs::*; @@ -155,6 +155,59 @@ impl Path { } } +/// Joins multiple symbols with "::" into a path, e.g. "a::b::c". If the first +/// segment is `kw::PathRoot` it will be printed as empty, e.g. "::b::c". +/// +/// The generics on the `path` argument mean it can accept many forms, such as: +/// - `&[Symbol]` +/// - `Vec<Symbol>` +/// - `Vec<&Symbol>` +/// - `impl Iterator<Item = Symbol>` +/// - `impl Iterator<Item = &Symbol>` +/// +/// Panics if `path` is empty or a segment after the first is `kw::PathRoot`. +pub fn join_path_syms(path: impl IntoIterator<Item = impl Borrow<Symbol>>) -> String { + // This is a guess at the needed capacity that works well in practice. It is slightly faster + // than (a) starting with an empty string, or (b) computing the exact capacity required. + // `8` works well because it's about the right size and jemalloc's size classes are all + // multiples of 8. + let mut iter = path.into_iter(); + let len_hint = iter.size_hint().1.unwrap_or(1); + let mut s = String::with_capacity(len_hint * 8); + + let first_sym = *iter.next().unwrap().borrow(); + if first_sym != kw::PathRoot { + s.push_str(first_sym.as_str()); + } + for sym in iter { + let sym = *sym.borrow(); + debug_assert_ne!(sym, kw::PathRoot); + s.push_str("::"); + s.push_str(sym.as_str()); + } + s +} + +/// Like `join_path_syms`, but for `Ident`s. This function is necessary because +/// `Ident::to_string` does more than just print the symbol in the `name` field. +pub fn join_path_idents(path: impl IntoIterator<Item = impl Borrow<Ident>>) -> String { + let mut iter = path.into_iter(); + let len_hint = iter.size_hint().1.unwrap_or(1); + let mut s = String::with_capacity(len_hint * 8); + + let first_ident = *iter.next().unwrap().borrow(); + if first_ident.name != kw::PathRoot { + s.push_str(&first_ident.to_string()); + } + for ident in iter { + let ident = *ident.borrow(); + debug_assert_ne!(ident.name, kw::PathRoot); + s.push_str("::"); + s.push_str(&ident.to_string()); + } + s +} + /// A segment of a path: an identifier, an optional lifetime, and a set of types. /// /// E.g., `std`, `String` or `Box<T>`. diff --git a/compiler/rustc_builtin_macros/src/source_util.rs b/compiler/rustc_builtin_macros/src/source_util.rs index cebfffa1e16..ecfd46a84ec 100644 --- a/compiler/rustc_builtin_macros/src/source_util.rs +++ b/compiler/rustc_builtin_macros/src/source_util.rs @@ -4,8 +4,8 @@ use std::sync::Arc; use rustc_ast as ast; use rustc_ast::ptr::P; -use rustc_ast::token; use rustc_ast::tokenstream::TokenStream; +use rustc_ast::{join_path_idents, token}; use rustc_ast_pretty::pprust; use rustc_expand::base::{ DummyResult, ExpandResult, ExtCtxt, MacEager, MacResult, MacroExpanderResult, resolve_path, @@ -100,7 +100,7 @@ pub(crate) fn expand_mod( let sp = cx.with_def_site_ctxt(sp); check_zero_tts(cx, sp, tts, "module_path!"); let mod_path = &cx.current_expansion.module.mod_path; - let string = mod_path.iter().map(|x| x.to_string()).collect::<Vec<String>>().join("::"); + let string = join_path_idents(mod_path); ExpandResult::Ready(MacEager::expr(cx.expr_str(sp, Symbol::intern(&string)))) } diff --git a/compiler/rustc_builtin_macros/src/test.rs b/compiler/rustc_builtin_macros/src/test.rs index b067578794b..ba3d8368b2a 100644 --- a/compiler/rustc_builtin_macros/src/test.rs +++ b/compiler/rustc_builtin_macros/src/test.rs @@ -5,7 +5,7 @@ use std::assert_matches::assert_matches; use std::iter; use rustc_ast::ptr::P; -use rustc_ast::{self as ast, GenericParamKind, attr}; +use rustc_ast::{self as ast, GenericParamKind, attr, join_path_idents}; use rustc_ast_pretty::pprust; use rustc_errors::{Applicability, Diag, Level}; use rustc_expand::base::*; @@ -446,12 +446,7 @@ fn get_location_info(cx: &ExtCtxt<'_>, fn_: &ast::Fn) -> (Symbol, usize, usize, } fn item_path(mod_path: &[Ident], item_ident: &Ident) -> String { - mod_path - .iter() - .chain(iter::once(item_ident)) - .map(|x| x.to_string()) - .collect::<Vec<String>>() - .join("::") + join_path_idents(mod_path.iter().chain(iter::once(item_ident))) } enum ShouldPanic { diff --git a/compiler/rustc_hir/src/hir.rs b/compiler/rustc_hir/src/hir.rs index 159518a8d87..ace2259aec3 100644 --- a/compiler/rustc_hir/src/hir.rs +++ b/compiler/rustc_hir/src/hir.rs @@ -7,7 +7,7 @@ use rustc_ast::token::CommentKind; use rustc_ast::util::parser::ExprPrecedence; use rustc_ast::{ self as ast, FloatTy, InlineAsmOptions, InlineAsmTemplatePiece, IntTy, Label, LitIntType, - LitKind, TraitObjectSyntax, UintTy, UnsafeBinderCastKind, + LitKind, TraitObjectSyntax, UintTy, UnsafeBinderCastKind, join_path_idents, }; pub use rustc_ast::{ AssignOp, AssignOpKind, AttrId, AttrStyle, BinOp, BinOpKind, BindingMode, BorrowKind, @@ -1168,7 +1168,7 @@ impl AttrPath { impl fmt::Display for AttrPath { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.segments.iter().map(|i| i.to_string()).collect::<Vec<_>>().join("::")) + write!(f, "{}", join_path_idents(&self.segments)) } } diff --git a/compiler/rustc_hir_typeck/src/method/prelude_edition_lints.rs b/compiler/rustc_hir_typeck/src/method/prelude_edition_lints.rs index 0037d5ac042..38413cca633 100644 --- a/compiler/rustc_hir_typeck/src/method/prelude_edition_lints.rs +++ b/compiler/rustc_hir_typeck/src/method/prelude_edition_lints.rs @@ -2,6 +2,7 @@ use std::fmt::Write; use hir::def_id::DefId; use hir::{HirId, ItemKind}; +use rustc_ast::join_path_idents; use rustc_errors::Applicability; use rustc_hir as hir; use rustc_lint::{ARRAY_INTO_ITER, BOXED_SLICE_INTO_ITER}; @@ -383,13 +384,9 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { // All that is left is `_`! We need to use the full path. It doesn't matter which one we // pick, so just take the first one. match import_items[0].kind { - ItemKind::Use(path, _) => Some( - path.segments - .iter() - .map(|segment| segment.ident.to_string()) - .collect::<Vec<_>>() - .join("::"), - ), + ItemKind::Use(path, _) => { + Some(join_path_idents(path.segments.iter().map(|seg| seg.ident))) + } _ => { span_bug!(span, "unexpected item kind, expected a use: {:?}", import_items[0].kind); } diff --git a/compiler/rustc_passes/src/check_attr.rs b/compiler/rustc_passes/src/check_attr.rs index 2009ddc1e2d..5b2b9c152aa 100644 --- a/compiler/rustc_passes/src/check_attr.rs +++ b/compiler/rustc_passes/src/check_attr.rs @@ -10,7 +10,7 @@ use std::collections::hash_map::Entry; use std::slice; use rustc_abi::{Align, ExternAbi, Size}; -use rustc_ast::{AttrStyle, LitKind, MetaItemInner, MetaItemKind, ast}; +use rustc_ast::{AttrStyle, LitKind, MetaItemInner, MetaItemKind, ast, join_path_syms}; use rustc_attr_data_structures::{AttributeKind, InlineAttr, ReprAttr, find_attr}; use rustc_attr_parsing::{AttributeParser, Late}; use rustc_data_structures::fx::FxHashMap; @@ -678,9 +678,7 @@ impl<'tcx> CheckAttrVisitor<'tcx> { allowed_target: Target, ) { if target != allowed_target { - let path = attr.path(); - let path: Vec<_> = path.iter().map(|s| s.as_str()).collect(); - let attr_name = path.join("::"); + let attr_name = join_path_syms(attr.path()); self.tcx.emit_node_span_lint( UNUSED_ATTRIBUTES, hir_id, diff --git a/compiler/rustc_resolve/src/diagnostics.rs b/compiler/rustc_resolve/src/diagnostics.rs index 11e4a729ae2..5a162b5d2b9 100644 --- a/compiler/rustc_resolve/src/diagnostics.rs +++ b/compiler/rustc_resolve/src/diagnostics.rs @@ -1,6 +1,8 @@ use rustc_ast::ptr::P; use rustc_ast::visit::{self, Visitor}; -use rustc_ast::{self as ast, CRATE_NODE_ID, Crate, ItemKind, ModKind, NodeId, Path}; +use rustc_ast::{ + self as ast, CRATE_NODE_ID, Crate, ItemKind, ModKind, NodeId, Path, join_path_idents, +}; use rustc_ast_pretty::pprust; use rustc_attr_data_structures::{ self as attr, AttributeKind, CfgEntry, Stability, StrippedCfgItem, find_attr, @@ -2018,7 +2020,7 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { } } - let mut sugg_paths = vec![]; + let mut sugg_paths: Vec<(Vec<Ident>, bool)> = vec![]; if let Some(mut def_id) = res.opt_def_id() { // We can't use `def_path_str` in resolve. let mut path = vec![def_id]; @@ -2031,16 +2033,16 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { } } // We will only suggest importing directly if it is accessible through that path. - let path_names: Option<Vec<String>> = path + let path_names: Option<Vec<Ident>> = path .iter() .rev() .map(|def_id| { - self.tcx.opt_item_name(*def_id).map(|n| { - if def_id.is_top_level_module() { - "crate".to_string() + self.tcx.opt_item_name(*def_id).map(|name| { + Ident::with_dummy_span(if def_id.is_top_level_module() { + kw::Crate } else { - n.to_string() - } + name + }) }) }) .collect(); @@ -2084,13 +2086,10 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { match binding.kind { NameBindingKind::Import { import, .. } => { for segment in import.module_path.iter().skip(1) { - path.push(segment.ident.to_string()); + path.push(segment.ident); } sugg_paths.push(( - path.iter() - .cloned() - .chain(vec![ident.to_string()].into_iter()) - .collect::<Vec<_>>(), + path.iter().cloned().chain(std::iter::once(ident)).collect::<Vec<_>>(), true, // re-export )); } @@ -2126,7 +2125,7 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { err.subdiagnostic(note); } // We prioritize shorter paths, non-core imports and direct imports over the alternatives. - sugg_paths.sort_by_key(|(p, reexport)| (p.len(), p[0] == "core", *reexport)); + sugg_paths.sort_by_key(|(p, reexport)| (p.len(), p[0].name == sym::core, *reexport)); for (sugg, reexport) in sugg_paths { if not_publicly_reexported { break; @@ -2136,7 +2135,7 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { // `tests/ui/imports/issue-55884-2.rs` continue; } - let path = sugg.join("::"); + let path = join_path_idents(sugg); let sugg = if reexport { errors::ImportIdent::ThroughReExport { span: dedup_span, ident, path } } else { diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs index f61cd1f0adf..24e15ded94f 100644 --- a/compiler/rustc_resolve/src/rustdoc.rs +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -7,6 +7,7 @@ use pulldown_cmark::{ }; use rustc_ast as ast; use rustc_ast::attr::AttributeExt; +use rustc_ast::join_path_syms; use rustc_ast::util::comments::beautify_doc_string; use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::unord::UnordSet; @@ -259,7 +260,7 @@ pub fn main_body_opts() -> Options { | Options::ENABLE_SMART_PUNCTUATION } -fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> { +fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<Symbol, MalformedGenerics> { let mut stripped_segment = String::new(); let mut param_depth = 0; @@ -284,7 +285,7 @@ fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, Malfor } if param_depth == 0 { - Ok(stripped_segment) + Ok(Symbol::intern(&stripped_segment)) } else { // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>` Err(MalformedGenerics::UnbalancedAngleBrackets) @@ -346,9 +347,8 @@ pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGen debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}"); - let stripped_path = stripped_segments.join("::"); - - if !stripped_path.is_empty() { + if !stripped_segments.is_empty() { + let stripped_path = join_path_syms(stripped_segments); Ok(stripped_path.into()) } else { Err(MalformedGenerics::MissingType) |
