about summary refs log tree commit diff
path: root/src/librustdoc/html/render
diff options
context:
space:
mode:
authorMichael Howell <michael@notriddle.com>2023-12-30 20:56:59 -0700
committerMichael Howell <michael@notriddle.com>2023-12-31 01:03:35 -0700
commit86b9550811b359a0af7a93523fbd14c5cdcc635f (patch)
tree75686b0d1bb31962acb6b5f381478b903b7eb523 /src/librustdoc/html/render
parent1ab60f2a646e119db42a192771a4cdbc294fd5ff (diff)
downloadrust-86b9550811b359a0af7a93523fbd14c5cdcc635f.tar.gz
rust-86b9550811b359a0af7a93523fbd14c5cdcc635f.zip
rustdoc-search: tighter encoding for `f` index
Two optimizations for the function signature search:

* Instead of using JSON arrays, like `[1,20]`, it uses VLQ
  hex with no commas, like `[aAd]`.
* This also adds backrefs: if you have more than one function
  with exactly the same signature, it'll not only store it once,
  it'll *decode* it once, and store in the typeIdMap only once.

Size change
-----------

standard library

```console
$ du -bs search-index-old.js search-index-new.js
4976370 search-index-old.js
4404391 search-index-new.js
```

((4976370-4404391)/4404391)*100% = 12.9%

Benchmarks are similarly shrunk:

```console
$ du -hs tmp/{arti,cortex-m,sqlx,stm32f4,ripgrep}/toolchain_{old,new}/doc/search-index.js
10555067        tmp/arti/toolchain_old/doc/search-index.js
8921236 tmp/arti/toolchain_new/doc/search-index.js
77018   tmp/cortex-m/toolchain_old/doc/search-index.js
66676   tmp/cortex-m/toolchain_new/doc/search-index.js
2876330 tmp/sqlx/toolchain_old/doc/search-index.js
2436812 tmp/sqlx/toolchain_new/doc/search-index.js
63632890        tmp/stm32f4/toolchain_old/doc/search-index.js
52337438        tmp/stm32f4/toolchain_new/doc/search-index.js
631150  tmp/ripgrep/toolchain_old/doc/search-index.js
541646  tmp/ripgrep/toolchain_new/doc/search-index.js
```
Diffstat (limited to 'src/librustdoc/html/render')
-rw-r--r--src/librustdoc/html/render/mod.rs146
-rw-r--r--src/librustdoc/html/render/search_index.rs29
2 files changed, 104 insertions, 71 deletions
diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs
index 118c6eeb289..345a47a21b8 100644
--- a/src/librustdoc/html/render/mod.rs
+++ b/src/librustdoc/html/render/mod.rs
@@ -58,7 +58,7 @@ use rustc_span::{
     symbol::{sym, Symbol},
     BytePos, FileName, RealFileName,
 };
-use serde::ser::{SerializeMap, SerializeSeq};
+use serde::ser::SerializeMap;
 use serde::{Serialize, Serializer};
 
 use crate::clean::{self, ItemId, RenderedLink, SelfTy};
@@ -123,44 +123,53 @@ pub(crate) struct IndexItem {
 }
 
 /// A type used for the search index.
-#[derive(Debug)]
+#[derive(Debug, Eq, PartialEq)]
 pub(crate) struct RenderType {
     id: Option<RenderTypeId>,
     generics: Option<Vec<RenderType>>,
     bindings: Option<Vec<(RenderTypeId, Vec<RenderType>)>>,
 }
 
-impl Serialize for RenderType {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        let id = match &self.id {
-            // 0 is a sentinel, everything else is one-indexed
-            None => 0,
-            // concrete type
-            Some(RenderTypeId::Index(idx)) if *idx >= 0 => idx + 1,
-            // generic type parameter
-            Some(RenderTypeId::Index(idx)) => *idx,
-            _ => panic!("must convert render types to indexes before serializing"),
-        };
+impl RenderType {
+    pub fn write_to_string(&self, string: &mut String) {
         if self.generics.is_some() || self.bindings.is_some() {
-            let mut seq = serializer.serialize_seq(None)?;
-            seq.serialize_element(&id)?;
-            seq.serialize_element(self.generics.as_ref().map(Vec::as_slice).unwrap_or_default())?;
+            string.push('{');
+            // 0 is a sentinel, everything else is one-indexed
+            match self.id {
+                Some(id) => id.write_to_string(string),
+                None => string.push('`'),
+            }
+            string.push('{');
+            for generic in &self.generics.as_ref().map(Vec::as_slice).unwrap_or_default()[..] {
+                generic.write_to_string(string);
+            }
+            string.push('}');
             if self.bindings.is_some() {
-                seq.serialize_element(
-                    self.bindings.as_ref().map(Vec::as_slice).unwrap_or_default(),
-                )?;
+                string.push('{');
+                for binding in &self.bindings.as_ref().map(Vec::as_slice).unwrap_or_default()[..] {
+                    string.push('{');
+                    binding.0.write_to_string(string);
+                    string.push('{');
+                    for constraint in &binding.1[..] {
+                        constraint.write_to_string(string);
+                    }
+                    string.push('}');
+                    string.push('}');
+                }
+                string.push('}');
             }
-            seq.end()
+            string.push('}');
         } else {
-            id.serialize(serializer)
+            // 0 is a sentinel, everything else is one-indexed
+            match self.id {
+                Some(id) => id.write_to_string(string),
+                None => string.push('`'),
+            }
         }
     }
 }
 
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub(crate) enum RenderTypeId {
     DefId(DefId),
     Primitive(clean::PrimitiveType),
@@ -168,36 +177,50 @@ pub(crate) enum RenderTypeId {
     Index(isize),
 }
 
-impl Serialize for RenderTypeId {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        let id = match &self {
+impl RenderTypeId {
+    pub fn write_to_string(&self, string: &mut String) {
+        // (sign, value)
+        let (sign, id): (bool, u32) = match &self {
             // 0 is a sentinel, everything else is one-indexed
             // concrete type
-            RenderTypeId::Index(idx) if *idx >= 0 => idx + 1,
+            RenderTypeId::Index(idx) if *idx >= 0 => (false, (idx + 1isize).try_into().unwrap()),
             // generic type parameter
-            RenderTypeId::Index(idx) => *idx,
+            RenderTypeId::Index(idx) => (true, (-*idx).try_into().unwrap()),
             _ => panic!("must convert render types to indexes before serializing"),
         };
-        id.serialize(serializer)
+        // zig-zag notation
+        let value: u32 = (id << 1) | (if sign { 1 } else { 0 });
+        // encode
+        let mut shift: u32 = 28;
+        let mut mask: u32 = 0xF0_00_00_00;
+        while shift < 32 {
+            let hexit = (value & mask) >> shift;
+            if hexit != 0 || shift == 0 {
+                let hex =
+                    char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap();
+                string.push(hex);
+            }
+            shift = shift.wrapping_sub(4);
+            mask = mask >> 4;
+        }
     }
 }
 
 /// Full type of functions/methods in the search index.
-#[derive(Debug)]
+#[derive(Debug, Eq, PartialEq)]
 pub(crate) struct IndexItemFunctionType {
     inputs: Vec<RenderType>,
     output: Vec<RenderType>,
     where_clause: Vec<Vec<RenderType>>,
 }
 
-impl Serialize for IndexItemFunctionType {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
+impl IndexItemFunctionType {
+    pub fn write_to_string<'a>(
+        &'a self,
+        string: &mut String,
+        backref_queue: &mut VecDeque<&'a IndexItemFunctionType>,
+    ) {
+        assert!(backref_queue.len() < 16);
         // If we couldn't figure out a type, just write `0`.
         let has_missing = self
             .inputs
@@ -205,33 +228,58 @@ impl Serialize for IndexItemFunctionType {
             .chain(self.output.iter())
             .any(|i| i.id.is_none() && i.generics.is_none());
         if has_missing {
-            0.serialize(serializer)
+            string.push('`');
+        } else if let Some(idx) = backref_queue.iter().position(|other| *other == self) {
+            string.push(
+                char::try_from('0' as u32 + u32::try_from(idx).unwrap())
+                    .expect("last possible value is '?'"),
+            );
         } else {
-            let mut seq = serializer.serialize_seq(None)?;
+            backref_queue.push_front(self);
+            if backref_queue.len() >= 16 {
+                backref_queue.pop_back();
+            }
+            string.push('{');
             match &self.inputs[..] {
                 [one] if one.generics.is_none() && one.bindings.is_none() => {
-                    seq.serialize_element(one)?
+                    one.write_to_string(string);
+                }
+                _ => {
+                    string.push('{');
+                    for item in &self.inputs[..] {
+                        item.write_to_string(string);
+                    }
+                    string.push('}');
                 }
-                _ => seq.serialize_element(&self.inputs)?,
             }
             match &self.output[..] {
                 [] if self.where_clause.is_empty() => {}
                 [one] if one.generics.is_none() && one.bindings.is_none() => {
-                    seq.serialize_element(one)?
+                    one.write_to_string(string);
+                }
+                _ => {
+                    string.push('{');
+                    for item in &self.output[..] {
+                        item.write_to_string(string);
+                    }
+                    string.push('}');
                 }
-                _ => seq.serialize_element(&self.output)?,
             }
             for constraint in &self.where_clause {
                 if let [one] = &constraint[..]
                     && one.generics.is_none()
                     && one.bindings.is_none()
                 {
-                    seq.serialize_element(one)?;
+                    one.write_to_string(string);
                 } else {
-                    seq.serialize_element(constraint)?;
+                    string.push('{');
+                    for item in &constraint[..] {
+                        item.write_to_string(string);
+                    }
+                    string.push('}');
                 }
             }
-            seq.end()
+            string.push('}');
         }
     }
 }
diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs
index a1029320d2d..e49df400c83 100644
--- a/src/librustdoc/html/render/search_index.rs
+++ b/src/librustdoc/html/render/search_index.rs
@@ -1,5 +1,5 @@
 use std::collections::hash_map::Entry;
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, VecDeque};
 
 use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
 use rustc_middle::ty::TyCtxt;
@@ -409,9 +409,11 @@ pub(crate) fn build_index<'tcx>(
             let mut full_paths = Vec::with_capacity(self.items.len());
             let mut descriptions = Vec::with_capacity(self.items.len());
             let mut parents = Vec::with_capacity(self.items.len());
-            let mut functions = Vec::with_capacity(self.items.len());
+            let mut functions = String::with_capacity(self.items.len());
             let mut deprecated = Vec::with_capacity(self.items.len());
 
+            let mut backref_queue = VecDeque::new();
+
             for (index, item) in self.items.iter().enumerate() {
                 let n = item.ty as u8;
                 let c = char::try_from(n + b'A').expect("item types must fit in ASCII");
@@ -434,27 +436,10 @@ pub(crate) fn build_index<'tcx>(
                     full_paths.push((index, &item.path));
                 }
 
-                // Fake option to get `0` out as a sentinel instead of `null`.
-                // We want to use `0` because it's three less bytes.
-                enum FunctionOption<'a> {
-                    Function(&'a IndexItemFunctionType),
-                    None,
-                }
-                impl<'a> Serialize for FunctionOption<'a> {
-                    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-                    where
-                        S: Serializer,
-                    {
-                        match self {
-                            FunctionOption::None => 0.serialize(serializer),
-                            FunctionOption::Function(ty) => ty.serialize(serializer),
-                        }
-                    }
+                match &item.search_type {
+                    Some(ty) => ty.write_to_string(&mut functions, &mut backref_queue),
+                    None => functions.push('`'),
                 }
-                functions.push(match &item.search_type {
-                    Some(ty) => FunctionOption::Function(ty),
-                    None => FunctionOption::None,
-                });
 
                 if item.deprecation.is_some() {
                     deprecated.push(index);