diff options
| author | Michael Howell <michael@notriddle.com> | 2022-06-24 17:12:58 -0700 |
|---|---|---|
| committer | Michael Howell <michael@notriddle.com> | 2022-06-24 18:16:33 -0700 |
| commit | dc1fc08dc9a6957a79cc24edde4dc016debf3fa7 (patch) | |
| tree | 60b67aa8501941b18e35679911283cfa07d8aea3 /src/librustdoc/html/render | |
| parent | fdca237d5194bf8a1c9b437ebd2114d1c2ba6195 (diff) | |
| download | rust-dc1fc08dc9a6957a79cc24edde4dc016debf3fa7.tar.gz rust-dc1fc08dc9a6957a79cc24edde4dc016debf3fa7.zip | |
rustdoc: reference function signature types from the `p` array
This reduces the size of the function signature index, because
it's common to have many functions that operate on the same types.
$ wc -c search-index-old.js search-index-new.js
5224374 search-index-old.js
3932314 search-index-new.js
By my math, this reduces the uncompressed size of the search index by 32%.
On compressed signatures, the wins are less drastic, a mere 8%:
$ wc -c search-index-old.js.gz search-index-new.js.gz
404532 search-index-old.js.gz
371635 search-index-new.js.gz
Diffstat (limited to 'src/librustdoc/html/render')
| -rw-r--r-- | src/librustdoc/html/render/mod.rs | 81 | ||||
| -rw-r--r-- | src/librustdoc/html/render/search_index.rs | 153 |
2 files changed, 146 insertions, 88 deletions
diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 3f426ee93e7..b01afa02b8b 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -110,63 +110,71 @@ pub(crate) struct IndexItem { /// A type used for the search index. #[derive(Debug)] pub(crate) struct RenderType { - name: Option<String>, - generics: Option<Vec<TypeWithKind>>, + id: Option<RenderTypeId>, + generics: Option<Vec<RenderType>>, } -/// Full type of functions/methods in the search index. -#[derive(Debug)] -pub(crate) struct IndexItemFunctionType { - inputs: Vec<TypeWithKind>, - output: Vec<TypeWithKind>, -} - -impl Serialize for IndexItemFunctionType { +impl Serialize for RenderType { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, { - // If we couldn't figure out a type, just write `null`. - let has_missing = self.inputs.iter().chain(self.output.iter()).any(|i| i.ty.name.is_none()); - if has_missing { - serializer.serialize_none() - } else { + let id = match &self.id { + // 0 is a sentinel, everything else is one-indexed + None => 0, + Some(RenderTypeId::Index(idx)) => idx + 1, + _ => panic!("must convert render types to indexes before serializing"), + }; + if let Some(generics) = &self.generics { let mut seq = serializer.serialize_seq(None)?; - seq.serialize_element(&self.inputs)?; - match self.output.as_slice() { - [] => {} - [one] => seq.serialize_element(one)?, - all => seq.serialize_element(all)?, - } + seq.serialize_element(&id)?; + seq.serialize_element(generics)?; seq.end() + } else { + id.serialize(serializer) } } } -#[derive(Debug)] -pub(crate) struct TypeWithKind { - ty: RenderType, - kind: ItemType, +#[derive(Clone, Debug)] +pub(crate) enum RenderTypeId { + DefId(DefId), + Primitive(clean::PrimitiveType), + Index(usize), } -impl From<(RenderType, ItemType)> for TypeWithKind { - fn from(x: (RenderType, ItemType)) -> TypeWithKind { - TypeWithKind { ty: x.0, kind: x.1 } - } +/// Full type of functions/methods in the search index. +#[derive(Debug)] +pub(crate) struct IndexItemFunctionType { + inputs: Vec<RenderType>, + output: Vec<RenderType>, } -impl Serialize for TypeWithKind { +impl Serialize for IndexItemFunctionType { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, { - let mut seq = serializer.serialize_seq(None)?; - seq.serialize_element(&self.ty.name)?; - seq.serialize_element(&self.kind)?; - if let Some(generics) = &self.ty.generics { - seq.serialize_element(generics)?; + // If we couldn't figure out a type, just write `0`. + let has_missing = self + .inputs + .iter() + .chain(self.output.iter()) + .any(|i| i.id.is_none() && i.generics.is_none()); + if has_missing { + 0.serialize(serializer) + } else { + let mut seq = serializer.serialize_seq(None)?; + match &self.inputs[..] { + [one] if one.generics.is_none() => seq.serialize_element(one)?, + _ => seq.serialize_element(&self.inputs)?, + } + match &self.output[..] { + [one] if one.generics.is_none() => seq.serialize_element(one)?, + _ => seq.serialize_element(&self.output)?, + } + seq.end() } - seq.end() } } @@ -2517,7 +2525,6 @@ fn item_ty_to_section(ty: ItemType) -> ItemSection { ItemType::ProcAttribute => ItemSection::AttributeMacros, ItemType::ProcDerive => ItemSection::DeriveMacros, ItemType::TraitAlias => ItemSection::TraitAliases, - ItemType::Generic => unreachable!(), } } diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index 9f302cc2566..6520dfc8ab1 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -3,7 +3,8 @@ use std::collections::BTreeMap; use rustc_data_structures::fx::FxHashMap; use rustc_middle::ty::TyCtxt; -use rustc_span::symbol::{kw, Symbol}; +use rustc_span::def_id::DefId; +use rustc_span::symbol::Symbol; use serde::ser::{Serialize, SerializeStruct, Serializer}; use crate::clean; @@ -12,7 +13,7 @@ use crate::formats::cache::{Cache, OrphanImplItem}; use crate::formats::item_type::ItemType; use crate::html::format::join_with_double_colon; use crate::html::markdown::short_markdown_summary; -use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, TypeWithKind}; +use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, RenderTypeId}; /// Builds the search index from the collected metadata pub(crate) fn build_index<'tcx>( @@ -48,14 +49,12 @@ pub(crate) fn build_index<'tcx>( .doc_value() .map_or_else(String::new, |s| short_markdown_summary(&s, &krate.module.link_names(cache))); - let Cache { ref mut search_index, ref paths, .. } = *cache; - // Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias, // we need the alias element to have an array of items. let mut aliases: BTreeMap<String, Vec<usize>> = BTreeMap::new(); // Sort search index items. This improves the compressibility of the search index. - search_index.sort_unstable_by(|k1, k2| { + cache.search_index.sort_unstable_by(|k1, k2| { // `sort_unstable_by_key` produces lifetime errors let k1 = (&k1.path, &k1.name, &k1.ty, &k1.parent); let k2 = (&k2.path, &k2.name, &k2.ty, &k2.parent); @@ -63,7 +62,7 @@ pub(crate) fn build_index<'tcx>( }); // Set up alias indexes. - for (i, item) in search_index.iter().enumerate() { + for (i, item) in cache.search_index.iter().enumerate() { for alias in &item.aliases[..] { aliases.entry(alias.as_str().to_lowercase()).or_default().push(i); } @@ -74,6 +73,79 @@ pub(crate) fn build_index<'tcx>( let mut lastpath = ""; let mut lastpathid = 0usize; + // First, on function signatures + let mut search_index = std::mem::replace(&mut cache.search_index, Vec::new()); + for item in search_index.iter_mut() { + fn convert_render_type( + ty: &mut RenderType, + cache: &mut Cache, + defid_to_pathid: &mut FxHashMap<DefId, usize>, + lastpathid: &mut usize, + crate_paths: &mut Vec<(ItemType, Symbol)>, + ) { + if let Some(generics) = &mut ty.generics { + for item in generics { + convert_render_type(item, cache, defid_to_pathid, lastpathid, crate_paths); + } + } + let Cache { ref paths, ref external_paths, .. } = *cache; + let Some(id) = ty.id.clone() else { + assert!(ty.generics.is_some()); + return; + }; + let (defid, path, item_type) = match id { + RenderTypeId::DefId(defid) => { + if let Some(&(ref fqp, item_type)) = + paths.get(&defid).or_else(|| external_paths.get(&defid)) + { + (defid, *fqp.last().unwrap(), item_type) + } else { + ty.id = None; + return; + } + } + RenderTypeId::Primitive(primitive) => { + let defid = *cache.primitive_locations.get(&primitive).unwrap(); + (defid, primitive.as_sym(), ItemType::Primitive) + } + RenderTypeId::Index(_) => return, + }; + match defid_to_pathid.entry(defid) { + Entry::Occupied(entry) => ty.id = Some(RenderTypeId::Index(*entry.get())), + Entry::Vacant(entry) => { + let pathid = *lastpathid; + entry.insert(pathid); + *lastpathid += 1; + crate_paths.push((item_type, path)); + ty.id = Some(RenderTypeId::Index(pathid)); + } + } + } + if let Some(search_type) = &mut item.search_type { + for item in &mut search_type.inputs { + convert_render_type( + item, + cache, + &mut defid_to_pathid, + &mut lastpathid, + &mut crate_paths, + ); + } + for item in &mut search_type.output { + convert_render_type( + item, + cache, + &mut defid_to_pathid, + &mut lastpathid, + &mut crate_paths, + ); + } + } + } + + let Cache { ref paths, .. } = *cache; + + // Then, on parent modules let crate_items: Vec<&IndexItem> = search_index .iter_mut() .map(|item| { @@ -151,6 +223,7 @@ pub(crate) fn build_index<'tcx>( "`{}` is missing idx", item.name ); + // 0 is a sentinel, everything else is one-indexed item.parent_idx.map(|x| x + 1).unwrap_or(0) }) .collect::<Vec<_>>(), @@ -202,36 +275,33 @@ pub(crate) fn get_function_type_for_search<'tcx>( _ => return None, }; - inputs.retain(|a| a.ty.name.is_some()); - output.retain(|a| a.ty.name.is_some()); + inputs.retain(|a| a.id.is_some() || a.generics.is_some()); + output.retain(|a| a.id.is_some() || a.generics.is_some()); Some(IndexItemFunctionType { inputs, output }) } -fn get_index_type(clean_type: &clean::Type, generics: Vec<TypeWithKind>) -> RenderType { +fn get_index_type(clean_type: &clean::Type, generics: Vec<RenderType>) -> RenderType { RenderType { - name: get_index_type_name(clean_type).map(|s| s.as_str().to_ascii_lowercase()), + id: get_index_type_id(clean_type), generics: if generics.is_empty() { None } else { Some(generics) }, } } -fn get_index_type_name(clean_type: &clean::Type) -> Option<Symbol> { +fn get_index_type_id(clean_type: &clean::Type) -> Option<RenderTypeId> { match *clean_type { - clean::Type::Path { ref path, .. } => { - let path_segment = path.segments.last().unwrap(); - Some(path_segment.name) - } + clean::Type::Path { ref path, .. } => Some(RenderTypeId::DefId(path.def_id())), clean::DynTrait(ref bounds, _) => { let path = &bounds[0].trait_; - Some(path.segments.last().unwrap().name) + Some(RenderTypeId::DefId(path.def_id())) } - // We return an empty name because we don't care about the generic name itself. - clean::Generic(_) | clean::ImplTrait(_) => Some(kw::Empty), - clean::Primitive(ref p) => Some(p.as_sym()), + clean::Primitive(p) => Some(RenderTypeId::Primitive(p)), clean::BorrowedRef { ref type_, .. } | clean::RawPointer(_, ref type_) => { - get_index_type_name(type_) + get_index_type_id(type_) } clean::BareFunction(_) + | clean::Generic(_) + | clean::ImplTrait(_) | clean::Tuple(_) | clean::Slice(_) | clean::Array(_, _) @@ -254,16 +324,10 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( arg: &'a Type, tcx: TyCtxt<'tcx>, recurse: usize, - res: &mut Vec<TypeWithKind>, + res: &mut Vec<RenderType>, cache: &Cache, ) { - fn insert_ty( - res: &mut Vec<TypeWithKind>, - tcx: TyCtxt<'_>, - ty: Type, - mut generics: Vec<TypeWithKind>, - cache: &Cache, - ) { + fn insert_ty(res: &mut Vec<RenderType>, ty: Type, mut generics: Vec<RenderType>) { // generics and impl trait are both identified by their generics, // rather than a type name itself let anonymous = ty.is_full_generic() || ty.is_impl_trait(); @@ -316,20 +380,11 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( return; } } - let mut index_ty = get_index_type(&ty, generics); - if index_ty.name.as_ref().map(|s| s.is_empty() && generics_empty).unwrap_or(true) { + let index_ty = get_index_type(&ty, generics); + if index_ty.id.is_none() && generics_empty { return; } - if anonymous { - // We remove the name of the full generic because we have no use for it. - index_ty.name = Some(String::new()); - res.push(TypeWithKind::from((index_ty, ItemType::Generic))); - } else if let Some(kind) = ty.def_id(cache).map(|did| tcx.def_kind(did).into()) { - res.push(TypeWithKind::from((index_ty, kind))); - } else if ty.is_primitive() { - // This is a primitive, let's store it as such. - res.push(TypeWithKind::from((index_ty, ItemType::Primitive))); - } + res.push(index_ty); } if recurse >= 10 { @@ -379,7 +434,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( } } } - insert_ty(res, tcx, arg.clone(), ty_generics, cache); + insert_ty(res, arg.clone(), ty_generics); } // Otherwise we check if the trait bounds are "inlined" like `T: Option<u32>`... if let Some(bound) = generics.params.iter().find(|g| g.is_type() && g.name == arg_s) { @@ -398,7 +453,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( ); } } - insert_ty(res, tcx, arg.clone(), ty_generics, cache); + insert_ty(res, arg.clone(), ty_generics); } } else if let Type::ImplTrait(ref bounds) = *arg { let mut ty_generics = Vec::new(); @@ -416,7 +471,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( ); } } - insert_ty(res, tcx, arg.clone(), ty_generics, cache); + insert_ty(res, arg.clone(), ty_generics); } else { // This is not a type parameter. So for example if we have `T, U: Option<T>`, and we're // looking at `Option`, we enter this "else" condition, otherwise if it's `T`, we don't. @@ -437,7 +492,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( ); } } - insert_ty(res, tcx, arg.clone(), ty_generics, cache); + insert_ty(res, arg.clone(), ty_generics); } } @@ -450,7 +505,7 @@ fn get_fn_inputs_and_outputs<'tcx>( tcx: TyCtxt<'tcx>, impl_generics: Option<&(clean::Type, clean::Generics)>, cache: &Cache, -) -> (Vec<TypeWithKind>, Vec<TypeWithKind>) { +) -> (Vec<RenderType>, Vec<RenderType>) { let decl = &func.decl; let combined_generics; @@ -478,9 +533,7 @@ fn get_fn_inputs_and_outputs<'tcx>( if !args.is_empty() { all_types.extend(args); } else { - if let Some(kind) = arg.type_.def_id(cache).map(|did| tcx.def_kind(did).into()) { - all_types.push(TypeWithKind::from((get_index_type(&arg.type_, vec![]), kind))); - } + all_types.push(get_index_type(&arg.type_, vec![])); } } @@ -497,9 +550,7 @@ fn get_fn_inputs_and_outputs<'tcx>( cache, ); if ret_types.is_empty() { - if let Some(kind) = return_type.def_id(cache).map(|did| tcx.def_kind(did).into()) { - ret_types.push(TypeWithKind::from((get_index_type(return_type, vec![]), kind))); - } + ret_types.push(get_index_type(return_type, vec![])); } } _ => {} |
