Auto merge of #98475 - notriddle:notriddle/index-fn-signatures, r=GuillaumeGomez

rustdoc: reference function signature types from the `p` array This reduces the size of the function signature index, because it's common to have many functions that operate on the same types. $ wc -c search-index-old.js search-index-new.js 5224374 search-index-old.js 3932314 search-index-new.js By my math, this reduces the uncompressed size of the search index by 32%. On compressed signatures, the wins are less drastic, a mere 8%: $ wc -c search-index-old.js.gz search-index-new.js.gz 404532 search-index-old.js.gz 371635 search-index-new.js.gz
author: bors <bors@rust-lang.org> 2022-06-28 21:40:10 +0000
committer: bors <bors@rust-lang.org> 2022-06-28 21:40:10 +0000
commit: 2953edc7b7a00d14c4ba940ebb46b4e7148a9d71 (patch)
tree: cf271f0749d00431e79bb72c88d7e20127d7964d
parent: 830880640304ba8699c5f9a0c4665c38a3271963 (diff)
parent: 33cf9ea4a2aebb015e81071968659bd51218c5af (diff)
download: rust-2953edc7b7a00d14c4ba940ebb46b4e7148a9d71.tar.gz
rust-2953edc7b7a00d14c4ba940ebb46b4e7148a9d71.zip
7 files changed, 380 insertions, 146 deletions
diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs
index 352803855a4..81aa8c6cf8e 100644
--- a/src/librustdoc/clean/types.rs
+++ b/src/librustdoc/clean/types.rs
@@ -1671,10 +1671,6 @@ impl Type {
         matches!(self, Type::ImplTrait(_))
     }
 
-    pub(crate) fn is_primitive(&self) -> bool {
-        self.primitive_type().is_some()
-    }
-
     pub(crate) fn projection(&self) -> Option<(&Type, DefId, PathSegment)> {
         if let QPath { self_type, trait_, assoc, .. } = self {
             Some((self_type, trait_.def_id(), *assoc.clone()))
diff --git a/src/librustdoc/formats/item_type.rs b/src/librustdoc/formats/item_type.rs
index eca5501cd33..9cb3327d7c7 100644
--- a/src/librustdoc/formats/item_type.rs
+++ b/src/librustdoc/formats/item_type.rs
@@ -48,7 +48,6 @@ pub(crate) enum ItemType {
     ProcAttribute = 23,
     ProcDerive = 24,
     TraitAlias = 25,
-    Generic = 26,
 }
 
 impl Serialize for ItemType {
@@ -175,7 +174,6 @@ impl ItemType {
             ItemType::ProcAttribute => "attr",
             ItemType::ProcDerive => "derive",
             ItemType::TraitAlias => "traitalias",
-            ItemType::Generic => "generic",
         }
     }
 }
diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs
index 3f426ee93e7..1ef41d62e5e 100644
--- a/src/librustdoc/html/render/mod.rs
+++ b/src/librustdoc/html/render/mod.rs
@@ -110,63 +110,72 @@ pub(crate) struct IndexItem {
 /// A type used for the search index.
 #[derive(Debug)]
 pub(crate) struct RenderType {
-    name: Option<String>,
-    generics: Option<Vec<TypeWithKind>>,
+    id: Option<RenderTypeId>,
+    generics: Option<Vec<RenderType>>,
 }
 
-/// Full type of functions/methods in the search index.
-#[derive(Debug)]
-pub(crate) struct IndexItemFunctionType {
-    inputs: Vec<TypeWithKind>,
-    output: Vec<TypeWithKind>,
-}
-
-impl Serialize for IndexItemFunctionType {
+impl Serialize for RenderType {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
         S: Serializer,
     {
-        // If we couldn't figure out a type, just write `null`.
-        let has_missing = self.inputs.iter().chain(self.output.iter()).any(|i| i.ty.name.is_none());
-        if has_missing {
-            serializer.serialize_none()
-        } else {
+        let id = match &self.id {
+            // 0 is a sentinel, everything else is one-indexed
+            None => 0,
+            Some(RenderTypeId::Index(idx)) => idx + 1,
+            _ => panic!("must convert render types to indexes before serializing"),
+        };
+        if let Some(generics) = &self.generics {
             let mut seq = serializer.serialize_seq(None)?;
-            seq.serialize_element(&self.inputs)?;
-            match self.output.as_slice() {
-                [] => {}
-                [one] => seq.serialize_element(one)?,
-                all => seq.serialize_element(all)?,
-            }
+            seq.serialize_element(&id)?;
+            seq.serialize_element(generics)?;
             seq.end()
+        } else {
+            id.serialize(serializer)
         }
     }
 }
 
-#[derive(Debug)]
-pub(crate) struct TypeWithKind {
-    ty: RenderType,
-    kind: ItemType,
+#[derive(Clone, Debug)]
+pub(crate) enum RenderTypeId {
+    DefId(DefId),
+    Primitive(clean::PrimitiveType),
+    Index(usize),
 }
 
-impl From<(RenderType, ItemType)> for TypeWithKind {
-    fn from(x: (RenderType, ItemType)) -> TypeWithKind {
-        TypeWithKind { ty: x.0, kind: x.1 }
-    }
+/// Full type of functions/methods in the search index.
+#[derive(Debug)]
+pub(crate) struct IndexItemFunctionType {
+    inputs: Vec<RenderType>,
+    output: Vec<RenderType>,
 }
 
-impl Serialize for TypeWithKind {
+impl Serialize for IndexItemFunctionType {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
         S: Serializer,
     {
-        let mut seq = serializer.serialize_seq(None)?;
-        seq.serialize_element(&self.ty.name)?;
-        seq.serialize_element(&self.kind)?;
-        if let Some(generics) = &self.ty.generics {
-            seq.serialize_element(generics)?;
+        // If we couldn't figure out a type, just write `0`.
+        let has_missing = self
+            .inputs
+            .iter()
+            .chain(self.output.iter())
+            .any(|i| i.id.is_none() && i.generics.is_none());
+        if has_missing {
+            0.serialize(serializer)
+        } else {
+            let mut seq = serializer.serialize_seq(None)?;
+            match &self.inputs[..] {
+                [one] if one.generics.is_none() => seq.serialize_element(one)?,
+                _ => seq.serialize_element(&self.inputs)?,
+            }
+            match &self.output[..] {
+                [] => {}
+                [one] if one.generics.is_none() => seq.serialize_element(one)?,
+                _ => seq.serialize_element(&self.output)?,
+            }
+            seq.end()
         }
-        seq.end()
     }
 }
 
@@ -2517,7 +2526,6 @@ fn item_ty_to_section(ty: ItemType) -> ItemSection {
         ItemType::ProcAttribute => ItemSection::AttributeMacros,
         ItemType::ProcDerive => ItemSection::DeriveMacros,
         ItemType::TraitAlias => ItemSection::TraitAliases,
-        ItemType::Generic => unreachable!(),
     }
 }
 
diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs
index 9f302cc2566..d672f0bb599 100644
--- a/src/librustdoc/html/render/search_index.rs
+++ b/src/librustdoc/html/render/search_index.rs
@@ -3,16 +3,19 @@ use std::collections::BTreeMap;
 
 use rustc_data_structures::fx::FxHashMap;
 use rustc_middle::ty::TyCtxt;
-use rustc_span::symbol::{kw, Symbol};
+use rustc_span::def_id::LOCAL_CRATE;
+use rustc_span::symbol::Symbol;
 use serde::ser::{Serialize, SerializeStruct, Serializer};
 
 use crate::clean;
-use crate::clean::types::{FnRetTy, Function, GenericBound, Generics, Type, WherePredicate};
+use crate::clean::types::{
+    FnRetTy, Function, GenericBound, Generics, ItemId, Type, WherePredicate,
+};
 use crate::formats::cache::{Cache, OrphanImplItem};
 use crate::formats::item_type::ItemType;
 use crate::html::format::join_with_double_colon;
 use crate::html::markdown::short_markdown_summary;
-use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, TypeWithKind};
+use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, RenderTypeId};
 
 /// Builds the search index from the collected metadata
 pub(crate) fn build_index<'tcx>(
@@ -20,7 +23,7 @@ pub(crate) fn build_index<'tcx>(
     cache: &mut Cache,
     tcx: TyCtxt<'tcx>,
 ) -> String {
-    let mut defid_to_pathid = FxHashMap::default();
+    let mut itemid_to_pathid = FxHashMap::default();
     let mut crate_paths = vec![];
 
     // Attach all orphan items to the type's definition if the type
@@ -48,14 +51,12 @@ pub(crate) fn build_index<'tcx>(
         .doc_value()
         .map_or_else(String::new, |s| short_markdown_summary(&s, &krate.module.link_names(cache)));
 
-    let Cache { ref mut search_index, ref paths, .. } = *cache;
-
     // Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias,
     // we need the alias element to have an array of items.
     let mut aliases: BTreeMap<String, Vec<usize>> = BTreeMap::new();
 
     // Sort search index items. This improves the compressibility of the search index.
-    search_index.sort_unstable_by(|k1, k2| {
+    cache.search_index.sort_unstable_by(|k1, k2| {
         // `sort_unstable_by_key` produces lifetime errors
         let k1 = (&k1.path, &k1.name, &k1.ty, &k1.parent);
         let k2 = (&k2.path, &k2.name, &k2.ty, &k2.parent);
@@ -63,7 +64,7 @@ pub(crate) fn build_index<'tcx>(
     });
 
     // Set up alias indexes.
-    for (i, item) in search_index.iter().enumerate() {
+    for (i, item) in cache.search_index.iter().enumerate() {
         for alias in &item.aliases[..] {
             aliases.entry(alias.as_str().to_lowercase()).or_default().push(i);
         }
@@ -74,24 +75,99 @@ pub(crate) fn build_index<'tcx>(
     let mut lastpath = "";
     let mut lastpathid = 0usize;
 
+    // First, on function signatures
+    let mut search_index = std::mem::replace(&mut cache.search_index, Vec::new());
+    for item in search_index.iter_mut() {
+        fn convert_render_type(
+            ty: &mut RenderType,
+            cache: &mut Cache,
+            itemid_to_pathid: &mut FxHashMap<ItemId, usize>,
+            lastpathid: &mut usize,
+            crate_paths: &mut Vec<(ItemType, Symbol)>,
+        ) {
+            if let Some(generics) = &mut ty.generics {
+                for item in generics {
+                    convert_render_type(item, cache, itemid_to_pathid, lastpathid, crate_paths);
+                }
+            }
+            let Cache { ref paths, ref external_paths, .. } = *cache;
+            let Some(id) = ty.id.clone() else {
+                assert!(ty.generics.is_some());
+                return;
+            };
+            let (itemid, path, item_type) = match id {
+                RenderTypeId::DefId(defid) => {
+                    if let Some(&(ref fqp, item_type)) =
+                        paths.get(&defid).or_else(|| external_paths.get(&defid))
+                    {
+                        (ItemId::DefId(defid), *fqp.last().unwrap(), item_type)
+                    } else {
+                        ty.id = None;
+                        return;
+                    }
+                }
+                RenderTypeId::Primitive(primitive) => (
+                    ItemId::Primitive(primitive, LOCAL_CRATE),
+                    primitive.as_sym(),
+                    ItemType::Primitive,
+                ),
+                RenderTypeId::Index(_) => return,
+            };
+            match itemid_to_pathid.entry(itemid) {
+                Entry::Occupied(entry) => ty.id = Some(RenderTypeId::Index(*entry.get())),
+                Entry::Vacant(entry) => {
+                    let pathid = *lastpathid;
+                    entry.insert(pathid);
+                    *lastpathid += 1;
+                    crate_paths.push((item_type, path));
+                    ty.id = Some(RenderTypeId::Index(pathid));
+                }
+            }
+        }
+        if let Some(search_type) = &mut item.search_type {
+            for item in &mut search_type.inputs {
+                convert_render_type(
+                    item,
+                    cache,
+                    &mut itemid_to_pathid,
+                    &mut lastpathid,
+                    &mut crate_paths,
+                );
+            }
+            for item in &mut search_type.output {
+                convert_render_type(
+                    item,
+                    cache,
+                    &mut itemid_to_pathid,
+                    &mut lastpathid,
+                    &mut crate_paths,
+                );
+            }
+        }
+    }
+
+    let Cache { ref paths, .. } = *cache;
+
+    // Then, on parent modules
     let crate_items: Vec<&IndexItem> = search_index
         .iter_mut()
         .map(|item| {
-            item.parent_idx = item.parent.and_then(|defid| match defid_to_pathid.entry(defid) {
-                Entry::Occupied(entry) => Some(*entry.get()),
-                Entry::Vacant(entry) => {
-                    let pathid = lastpathid;
-                    entry.insert(pathid);
-                    lastpathid += 1;
+            item.parent_idx =
+                item.parent.and_then(|defid| match itemid_to_pathid.entry(ItemId::DefId(defid)) {
+                    Entry::Occupied(entry) => Some(*entry.get()),
+                    Entry::Vacant(entry) => {
+                        let pathid = lastpathid;
+                        entry.insert(pathid);
+                        lastpathid += 1;
 
-                    if let Some(&(ref fqp, short)) = paths.get(&defid) {
-                        crate_paths.push((short, *fqp.last().unwrap()));
-                        Some(pathid)
-                    } else {
-                        None
+                        if let Some(&(ref fqp, short)) = paths.get(&defid) {
+                            crate_paths.push((short, *fqp.last().unwrap()));
+                            Some(pathid)
+                        } else {
+                            None
+                        }
                     }
-                }
-            });
+                });
 
             // Omit the parent path if it is same to that of the prior item.
             if lastpath == &item.path {
@@ -151,13 +227,40 @@ pub(crate) fn build_index<'tcx>(
                             "`{}` is missing idx",
                             item.name
                         );
+                        // 0 is a sentinel, everything else is one-indexed
                         item.parent_idx.map(|x| x + 1).unwrap_or(0)
                     })
                     .collect::<Vec<_>>(),
             )?;
             crate_data.serialize_field(
                 "f",
-                &self.items.iter().map(|item| &item.search_type).collect::<Vec<_>>(),
+                &self
+                    .items
+                    .iter()
+                    .map(|item| {
+                        // Fake option to get `0` out as a sentinel instead of `null`.
+                        // We want to use `0` because it's three less bytes.
+                        enum FunctionOption<'a> {
+                            Function(&'a IndexItemFunctionType),
+                            None,
+                        }
+                        impl<'a> Serialize for FunctionOption<'a> {
+                            fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+                            where
+                                S: Serializer,
+                            {
+                                match self {
+                                    FunctionOption::None => 0.serialize(serializer),
+                                    FunctionOption::Function(ty) => ty.serialize(serializer),
+                                }
+                            }
+                        }
+                        match &item.search_type {
+                            Some(ty) => FunctionOption::Function(ty),
+                            None => FunctionOption::None,
+                        }
+                    })
+                    .collect::<Vec<_>>(),
             )?;
             crate_data.serialize_field(
                 "p",
@@ -202,36 +305,33 @@ pub(crate) fn get_function_type_for_search<'tcx>(
         _ => return None,
     };
 
-    inputs.retain(|a| a.ty.name.is_some());
-    output.retain(|a| a.ty.name.is_some());
+    inputs.retain(|a| a.id.is_some() || a.generics.is_some());
+    output.retain(|a| a.id.is_some() || a.generics.is_some());
 
     Some(IndexItemFunctionType { inputs, output })
 }
 
-fn get_index_type(clean_type: &clean::Type, generics: Vec<TypeWithKind>) -> RenderType {
+fn get_index_type(clean_type: &clean::Type, generics: Vec<RenderType>) -> RenderType {
     RenderType {
-        name: get_index_type_name(clean_type).map(|s| s.as_str().to_ascii_lowercase()),
+        id: get_index_type_id(clean_type),
         generics: if generics.is_empty() { None } else { Some(generics) },
     }
 }
 
-fn get_index_type_name(clean_type: &clean::Type) -> Option<Symbol> {
+fn get_index_type_id(clean_type: &clean::Type) -> Option<RenderTypeId> {
     match *clean_type {
-        clean::Type::Path { ref path, .. } => {
-            let path_segment = path.segments.last().unwrap();
-            Some(path_segment.name)
-        }
+        clean::Type::Path { ref path, .. } => Some(RenderTypeId::DefId(path.def_id())),
         clean::DynTrait(ref bounds, _) => {
             let path = &bounds[0].trait_;
-            Some(path.segments.last().unwrap().name)
+            Some(RenderTypeId::DefId(path.def_id()))
         }
-        // We return an empty name because we don't care about the generic name itself.
-        clean::Generic(_) | clean::ImplTrait(_) => Some(kw::Empty),
-        clean::Primitive(ref p) => Some(p.as_sym()),
+        clean::Primitive(p) => Some(RenderTypeId::Primitive(p)),
         clean::BorrowedRef { ref type_, .. } | clean::RawPointer(_, ref type_) => {
-            get_index_type_name(type_)
+            get_index_type_id(type_)
         }
         clean::BareFunction(_)
+        | clean::Generic(_)
+        | clean::ImplTrait(_)
         | clean::Tuple(_)
         | clean::Slice(_)
         | clean::Array(_, _)
@@ -254,16 +354,10 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>(
     arg: &'a Type,
     tcx: TyCtxt<'tcx>,
     recurse: usize,
-    res: &mut Vec<TypeWithKind>,
+    res: &mut Vec<RenderType>,
     cache: &Cache,
 ) {
-    fn insert_ty(
-        res: &mut Vec<TypeWithKind>,
-        tcx: TyCtxt<'_>,
-        ty: Type,
-        mut generics: Vec<TypeWithKind>,
-        cache: &Cache,
-    ) {
+    fn insert_ty(res: &mut Vec<RenderType>, ty: Type, mut generics: Vec<RenderType>) {
         // generics and impl trait are both identified by their generics,
         // rather than a type name itself
         let anonymous = ty.is_full_generic() || ty.is_impl_trait();
@@ -316,20 +410,11 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>(
                 return;
             }
         }
-        let mut index_ty = get_index_type(&ty, generics);
-        if index_ty.name.as_ref().map(|s| s.is_empty() && generics_empty).unwrap_or(true) {
+        let index_ty = get_index_type(&ty, generics);
+        if index_ty.id.is_none() && generics_empty {
             return;
         }
-        if anonymous {
-            // We remove the name of the full generic because we have no use for it.
-            index_ty.name = Some(String::new());
-            res.push(TypeWithKind::from((index_ty, ItemType::Generic)));
-        } else if let Some(kind) = ty.def_id(cache).map(|did| tcx.def_kind(did).into()) {
-            res.push(TypeWithKind::from((index_ty, kind)));
-        } else if ty.is_primitive() {
-            // This is a primitive, let's store it as such.
-            res.push(TypeWithKind::from((index_ty, ItemType::Primitive)));
-        }
+        res.push(index_ty);
     }
 
     if recurse >= 10 {
@@ -379,7 +464,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>(
                     }
                 }
             }
-            insert_ty(res, tcx, arg.clone(), ty_generics, cache);
+            insert_ty(res, arg.clone(), ty_generics);
         }
         // Otherwise we check if the trait bounds are "inlined" like `T: Option<u32>`...
         if let Some(bound) = generics.params.iter().find(|g| g.is_type() && g.name == arg_s) {
@@ -398,7 +483,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>(
                     );
                 }
             }
-            insert_ty(res, tcx, arg.clone(), ty_generics, cache);
+            insert_ty(res, arg.clone(), ty_generics);
         }
     } else if let Type::ImplTrait(ref bounds) = *arg {
         let mut ty_generics = Vec::new();
@@ -416,7 +501,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>(
                 );
             }
         }
-        insert_ty(res, tcx, arg.clone(), ty_generics, cache);
+        insert_ty(res, arg.clone(), ty_generics);
     } else {
         // This is not a type parameter. So for example if we have `T, U: Option<T>`, and we're
         // looking at `Option`, we enter this "else" condition, otherwise if it's `T`, we don't.
@@ -437,7 +522,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>(
                 );
             }
         }
-        insert_ty(res, tcx, arg.clone(), ty_generics, cache);
+        insert_ty(res, arg.clone(), ty_generics);
     }
 }
 
@@ -450,7 +535,7 @@ fn get_fn_inputs_and_outputs<'tcx>(
     tcx: TyCtxt<'tcx>,
     impl_generics: Option<&(clean::Type, clean::Generics)>,
     cache: &Cache,
-) -> (Vec<TypeWithKind>, Vec<TypeWithKind>) {
+) -> (Vec<RenderType>, Vec<RenderType>) {
     let decl = &func.decl;
 
     let combined_generics;
@@ -478,9 +563,7 @@ fn get_fn_inputs_and_outputs<'tcx>(
         if !args.is_empty() {
             all_types.extend(args);
         } else {
-            if let Some(kind) = arg.type_.def_id(cache).map(|did| tcx.def_kind(did).into()) {
-                all_types.push(TypeWithKind::from((get_index_type(&arg.type_, vec![]), kind)));
-            }
+            all_types.push(get_index_type(&arg.type_, vec![]));
         }
     }
 
@@ -497,9 +580,7 @@ fn get_fn_inputs_and_outputs<'tcx>(
                 cache,
             );
             if ret_types.is_empty() {
-                if let Some(kind) = return_type.def_id(cache).map(|did| tcx.def_kind(did).into()) {
-                    ret_types.push(TypeWithKind::from((get_index_type(return_type, vec![]), kind)));
-                }
+                ret_types.push(get_index_type(return_type, vec![]));
             }
         }
         _ => {}
diff --git a/src/librustdoc/html/static/js/externs.js b/src/librustdoc/html/static/js/externs.js
index defdc20132e..ecbe15a59da 100644
--- a/src/librustdoc/html/static/js/externs.js
+++ b/src/librustdoc/html/static/js/externs.js
@@ -81,3 +81,62 @@ let ResultsTable;
  * }}
  */
 let Results;
+
+/**
+ * A pair of [inputs, outputs], or 0 for null. This is stored in the search index.
+ * The JavaScript deserializes this into FunctionSearchType.
+ *
+ * Numeric IDs are *ONE-indexed* into the paths array (`p`). Zero is used as a sentinel for `null`
+ * because `null` is four bytes while `0` is one byte.
+ *
+ * An input or output can be encoded as just a number if there is only one of them, AND
+ * it has no generics. The no generics rule exists to avoid ambiguity: imagine if you had
+ * a function with a single output, and that output had a single generic:
+ *
+ *     fn something() -> Result<usize, usize>
+ *
+ * If output was allowed to be any RawFunctionType, it would look like this
+ *
+ *     [[], [50, [3, 3]]]
+ *
+ * The problem is that the above output could be interpreted as either a type with ID 50 and two
+ * generics, or it could be interpreted as a pair of types, the first one with ID 50 and the second
+ * with ID 3 and a single generic parameter that is also ID 3. We avoid this ambiguity by choosing
+ * in favor of the pair of types interpretation. This is why the `(number|Array<RawFunctionType>)`
+ * is used instead of `(RawFunctionType|Array<RawFunctionType>)`.
+ *
+ * @typedef {(
+ *     0 |
+ *     [(number|Array<RawFunctionType>)] |
+ *     [(number|Array<RawFunctionType>), (number|Array<RawFunctionType>)]
+ * )}
+ */
+let RawFunctionSearchType;
+
+/**
+ * A single function input or output type. This is either a single path ID, or a pair of
+ * [path ID, generics].
+ *
+ * Numeric IDs are *ONE-indexed* into the paths array (`p`). Zero is used as a sentinel for `null`
+ * because `null` is four bytes while `0` is one byte.
+ *
+ * @typedef {number | [number, Array<RawFunctionType>]}
+ */
+let RawFunctionType;
+
+/**
+ * @typedef {{
+ *     inputs: Array<FunctionType>,
+ *     outputs: Array<FunctionType>,
+ * }}
+ */
+let FunctionSearchType;
+
+/**
+ * @typedef {{
+ *     name: (null|string),
+ *     ty: (null|number),
+ *     generics: Array<FunctionType>,
+ * }}
+ */
+let FunctionType;
diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js
index cb1609d4983..75c7bd45a29 100644
--- a/src/librustdoc/html/static/js/search.js
+++ b/src/librustdoc/html/static/js/search.js
@@ -114,10 +114,6 @@ function levenshtein(s1, s2) {
 function initSearch(rawSearchIndex) {
     const MAX_LEV_DISTANCE = 3;
     const MAX_RESULTS = 200;
-    const GENERICS_DATA = 2;
-    const NAME = 0;
-    const INPUTS_DATA = 0;
-    const OUTPUT_DATA = 1;
     const NO_TYPE_FILTER = -1;
     /**
      *  @type {Array<Row>}
@@ -895,21 +891,18 @@ function initSearch(rawSearchIndex) {
          * @return {integer}           - Returns the best match (if any) or `MAX_LEV_DISTANCE + 1`.
          */
         function checkGenerics(row, elem, defaultLev) {
-            if (row.length <= GENERICS_DATA || row[GENERICS_DATA].length === 0) {
-                return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1;
-            } else if (row[GENERICS_DATA].length > 0 && row[GENERICS_DATA][0][NAME] === "") {
-                if (row.length > GENERICS_DATA) {
-                    return checkGenerics(row[GENERICS_DATA][0], elem, defaultLev);
-                }
+            if (row.generics.length === 0) {
                 return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1;
+            } else if (row.generics.length > 0 && row.generics[0].name === null) {
+                return checkGenerics(row.generics[0], elem, defaultLev);
             }
             // The names match, but we need to be sure that all generics kinda
             // match as well.
             let elem_name;
-            if (elem.generics.length > 0 && row[GENERICS_DATA].length >= elem.generics.length) {
+            if (elem.generics.length > 0 && row.generics.length >= elem.generics.length) {
                 const elems = Object.create(null);
-                for (const entry of row[GENERICS_DATA]) {
-                    elem_name = entry[NAME];
+                for (const entry of row.generics) {
+                    elem_name = entry.name;
                     if (elem_name === "") {
                         // Pure generic, needs to check into it.
                         if (checkGenerics(entry, elem, MAX_LEV_DISTANCE + 1) !== 0) {
@@ -963,7 +956,7 @@ function initSearch(rawSearchIndex) {
           */
         function checkIfInGenerics(row, elem) {
             let lev = MAX_LEV_DISTANCE + 1;
-            for (const entry of row[GENERICS_DATA]) {
+            for (const entry of row.generics) {
                 lev = Math.min(checkType(entry, elem, true), lev);
                 if (lev === 0) {
                     break;
@@ -984,23 +977,22 @@ function initSearch(rawSearchIndex) {
           *                     no match, returns `MAX_LEV_DISTANCE + 1`.
           */
         function checkType(row, elem, literalSearch) {
-            if (row[NAME].length === 0) {
+            if (row.name === null) {
                 // This is a pure "generic" search, no need to run other checks.
-                if (row.length > GENERICS_DATA) {
+                if (row.generics.length > 0) {
                     return checkIfInGenerics(row, elem);
                 }
                 return MAX_LEV_DISTANCE + 1;
             }
 
-            let lev = levenshtein(row[NAME], elem.name);
+            let lev = levenshtein(row.name, elem.name);
             if (literalSearch) {
                 if (lev !== 0) {
                     // The name didn't match, let's try to check if the generics do.
                     if (elem.generics.length === 0) {
-                        const checkGeneric = (row.length > GENERICS_DATA &&
-                            row[GENERICS_DATA].length > 0);
-                        if (checkGeneric && row[GENERICS_DATA]
-                            .findIndex(tmp_elem => tmp_elem[NAME] === elem.name) !== -1) {
+                        const checkGeneric = row.generics.length > 0;
+                        if (checkGeneric && row.generics
+                            .findIndex(tmp_elem => tmp_elem.name === elem.name) !== -1) {
                             return 0;
                         }
                     }
@@ -1009,7 +1001,7 @@ function initSearch(rawSearchIndex) {
                     return checkGenerics(row, elem, MAX_LEV_DISTANCE + 1);
                 }
                 return 0;
-            } else if (row.length > GENERICS_DATA) {
+            } else if (row.generics.length > 0) {
                 if (elem.generics.length === 0) {
                     if (lev === 0) {
                         return 0;
@@ -1059,9 +1051,9 @@ function initSearch(rawSearchIndex) {
         function findArg(row, elem, typeFilter) {
             let lev = MAX_LEV_DISTANCE + 1;
 
-            if (row && row.type && row.type[INPUTS_DATA] && row.type[INPUTS_DATA].length > 0) {
-                for (const input of row.type[INPUTS_DATA]) {
-                    if (!typePassesFilter(typeFilter, input[1])) {
+            if (row && row.type && row.type.inputs && row.type.inputs.length > 0) {
+                for (const input of row.type.inputs) {
+                    if (!typePassesFilter(typeFilter, input.ty)) {
                         continue;
                     }
                     lev = Math.min(lev, checkType(input, elem, parsedQuery.literalSearch));
@@ -1086,13 +1078,10 @@ function initSearch(rawSearchIndex) {
         function checkReturned(row, elem, typeFilter) {
             let lev = MAX_LEV_DISTANCE + 1;
 
-            if (row && row.type && row.type.length > OUTPUT_DATA) {
-                let ret = row.type[OUTPUT_DATA];
-                if (typeof ret[0] === "string") {
-                    ret = [ret];
-                }
+            if (row && row.type && row.type.output.length > 0) {
+                const ret = row.type.output;
                 for (const ret_ty of ret) {
-                    if (!typePassesFilter(typeFilter, ret_ty[1])) {
+                    if (!typePassesFilter(typeFilter, ret_ty.ty)) {
                         continue;
                     }
                     lev = Math.min(lev, checkType(ret_ty, elem, parsedQuery.literalSearch));
@@ -1836,6 +1825,97 @@ function initSearch(rawSearchIndex) {
             filterCrates);
     }
 
+    /**
+     * Convert a list of RawFunctionType / ID to object-based FunctionType.
+     *
+     * Crates often have lots of functions in them, and it's common to have a large number of
+     * functions that operate on a small set of data types, so the search index compresses them
+     * by encoding function parameter and return types as indexes into an array of names.
+     *
+     * Even when a general-purpose compression algorithm is used, this is still a win. I checked.
+     * https://github.com/rust-lang/rust/pull/98475#issue-1284395985
+     *
+     * The format for individual function types is encoded in
+     * librustdoc/html/render/mod.rs: impl Serialize for RenderType
+     *
+     * @param {null|Array<RawFunctionType>} types
+     * @param {Array<{name: string, ty: number}>} lowercasePaths
+     *
+     * @return {Array<FunctionSearchType>}
+     */
+    function buildItemSearchTypeAll(types, lowercasePaths) {
+        const PATH_INDEX_DATA = 0;
+        const GENERICS_DATA = 1;
+        return types.map(type => {
+            let pathIndex, generics;
+            if (typeof type === "number") {
+                pathIndex = type;
+                generics = [];
+            } else {
+                pathIndex = type[PATH_INDEX_DATA];
+                generics = buildItemSearchTypeAll(type[GENERICS_DATA], lowercasePaths);
+            }
+            return {
+                // `0` is used as a sentinel because it's fewer bytes than `null`
+                name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name,
+                ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
+                generics: generics,
+            };
+        });
+    }
+
+    /**
+     * Convert from RawFunctionSearchType to FunctionSearchType.
+     *
+     * Crates often have lots of functions in them, and function signatures are sometimes complex,
+     * so rustdoc uses a pretty tight encoding for them. This function converts it to a simpler,
+     * object-based encoding so that the actual search code is more readable and easier to debug.
+     *
+     * The raw function search type format is generated using serde in
+     * librustdoc/html/render/mod.rs: impl Serialize for IndexItemFunctionType
+     *
+     * @param {RawFunctionSearchType} functionSearchType
+     * @param {Array<{name: string, ty: number}>} lowercasePaths
+     *
+     * @return {null|FunctionSearchType}
+     */
+    function buildFunctionSearchType(functionSearchType, lowercasePaths) {
+        const INPUTS_DATA = 0;
+        const OUTPUT_DATA = 1;
+        // `0` is used as a sentinel because it's fewer bytes than `null`
+        if (functionSearchType === 0) {
+            return null;
+        }
+        let inputs, output;
+        if (typeof functionSearchType[INPUTS_DATA] === "number") {
+            const pathIndex = functionSearchType[INPUTS_DATA];
+            inputs = [{
+                name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name,
+                ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
+                generics: [],
+            }];
+        } else {
+            inputs = buildItemSearchTypeAll(functionSearchType[INPUTS_DATA], lowercasePaths);
+        }
+        if (functionSearchType.length > 1) {
+            if (typeof functionSearchType[OUTPUT_DATA] === "number") {
+                const pathIndex = functionSearchType[OUTPUT_DATA];
+                output = [{
+                    name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name,
+                    ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
+                    generics: [],
+                }];
+            } else {
+                output = buildItemSearchTypeAll(functionSearchType[OUTPUT_DATA], lowercasePaths);
+            }
+        } else {
+            output = [];
+        }
+        return {
+            inputs, output,
+        };
+    }
+
     function buildIndex(rawSearchIndex) {
         searchIndex = [];
         /**
@@ -1862,14 +1942,22 @@ function initSearch(rawSearchIndex) {
              * q[i] contains the full path of the item, or an empty string indicating
              * "same as q[i-1]".
              *
-             * i[i], f[i] are a mystery.
+             * i[i] contains an item's parent, usually a module. For compactness,
+             * it is a set of indexes into the `p` array.
+             *
+             * f[i] contains function signatures, or `0` if the item isn't a function.
+             * Functions are themselves encoded as arrays. The first item is a list of
+             * types representing the function's inputs, and the second list item is a list
+             * of types representing the function's output. Tuples are flattened.
+             * Types are also represented as arrays; the first item is an index into the `p`
+             * array, while the second is a list of types representing any generic parameters.
              *
              * `a` defines aliases with an Array of pairs: [name, offset], where `offset`
              * points into the n/t/d/q/i/f arrays.
              *
              * `doc` contains the description of the crate.
              *
-             * `p` is a mystery and isn't the same length as n/t/d/q/i/f.
+             * `p` is a list of path/type pairs. It is used for parents and function parameters.
              *
              * @type {{
              *   doc: string,
@@ -1879,7 +1967,7 @@ function initSearch(rawSearchIndex) {
              *   d: Array<string>,
              *   q: Array<string>,
              *   i: Array<Number>,
-             *   f: Array<Array<?>>,
+             *   f: Array<RawFunctionSearchType>,
              *   p: Array<Object>,
              * }}
              */
@@ -1923,9 +2011,14 @@ function initSearch(rawSearchIndex) {
             //             [Number] index to items]
             const aliases = crateCorpus.a;
 
+            // an array of [{name: String, ty: Number}]
+            const lowercasePaths = [];
+
             // convert `rawPaths` entries into object form
+            // generate normalizedPaths for function search mode
             let len = paths.length;
             for (i = 0; i < len; ++i) {
+                lowercasePaths.push({ty: paths[i][0], name: paths[i][1].toLowerCase()});
                 paths[i] = {ty: paths[i][0], name: paths[i][1]};
             }
 
@@ -1955,7 +2048,7 @@ function initSearch(rawSearchIndex) {
                     path: itemPaths[i] ? itemPaths[i] : lastPath,
                     desc: itemDescs[i],
                     parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined,
-                    type: itemFunctionSearchTypes[i],
+                    type: buildFunctionSearchType(itemFunctionSearchTypes[i], lowercasePaths),
                     id: id,
                     normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""),
                 };
diff --git a/src/librustdoc/json/conversions.rs b/src/librustdoc/json/conversions.rs
index 54dc1b4fdee..afc84cc0a97 100644
--- a/src/librustdoc/json/conversions.rs
+++ b/src/librustdoc/json/conversions.rs
@@ -761,7 +761,6 @@ impl FromWithTcx<ItemType> for ItemKind {
             TraitAlias => ItemKind::TraitAlias,
             ProcAttribute => ItemKind::ProcAttribute,
             ProcDerive => ItemKind::ProcDerive,
-            Generic => unreachable!(),
         }
     }
 }
author	bors <bors@rust-lang.org>	2022-06-28 21:40:10 +0000
committer	bors <bors@rust-lang.org>	2022-06-28 21:40:10 +0000
commit	2953edc7b7a00d14c4ba940ebb46b4e7148a9d71 (patch)
tree	cf271f0749d00431e79bb72c88d7e20127d7964d
parent	830880640304ba8699c5f9a0c4665c38a3271963 (diff)
parent	33cf9ea4a2aebb015e81071968659bd51218c5af (diff)
download	rust-2953edc7b7a00d14c4ba940ebb46b4e7148a9d71.tar.gz rust-2953edc7b7a00d14c4ba940ebb46b4e7148a9d71.zip