From 5bec161ce12501695356492129ea8fc952a30f85 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Wed, 24 Sep 2025 10:34:52 -0700 Subject: rustdoc-search: stringdex update with more packing Before: 18M build/x86_64-unknown-linux-gnu/doc/search.index/ 57M build/x86_64-unknown-linux-gnu/compiler-doc/search.index/ After: 16M build/x86_64-unknown-linux-gnu/doc/search.index/ 49M build/x86_64-unknown-linux-gnu/compiler-doc/search.index/ --- Cargo.lock | 4 +- src/librustdoc/Cargo.toml | 2 +- src/librustdoc/html/static/js/stringdex.js | 78 +++++++++++++++++++----------- 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2e635b4cfe..7c2eb559b21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5233,9 +5233,9 @@ dependencies = [ [[package]] name = "stringdex" -version = "0.0.1-alpha9" +version = "0.0.1-alpha10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7081029913fd7d591c0112182aba8c98ae886b4f12edb208130496cd17dc3c15" +checksum = "0fa846a7d509d1828a4f90962dc09810e161abcada7fc6a921e92c168d0811d7" dependencies = [ "stacker", ] diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index f37a8d85361..0d77fe64e30 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" smallvec = "1.8.1" -stringdex = { version = "0.0.1-alpha9" } +stringdex = { version = "0.0.1-alpha10" } tempfile = "3" threadpool = "1.8.1" tracing = "0.1" diff --git a/src/librustdoc/html/static/js/stringdex.js b/src/librustdoc/html/static/js/stringdex.js index 5bdc81e330e..6299576d564 100644 --- a/src/librustdoc/html/static/js/stringdex.js +++ b/src/librustdoc/html/static/js/stringdex.js @@ -1108,22 +1108,39 @@ function loadDatabase(hooks) { const id2 = id1 + ((nodeid[4] << 8) | nodeid[5]); leaves = RoaringBitmap.makeSingleton(id1) .union(RoaringBitmap.makeSingleton(id2)); + } else if (!isWhole && (nodeid[0] & 0xf0) === 0x80) { + const id1 = ((nodeid[0] & 0x0f) << 16) | (nodeid[1] << 8) | nodeid[2]; + const id2 = id1 + ((nodeid[3] << 4) | ((nodeid[4] >> 4) & 0x0f)); + const id3 = id2 + (((nodeid[4] & 0x0f) << 8) | nodeid[5]); + leaves = RoaringBitmap.makeSingleton(id1) + .union(RoaringBitmap.makeSingleton(id2)) + .union(RoaringBitmap.makeSingleton(id3)); } else { leaves = RoaringBitmap.makeSingleton( (nodeid[2] << 24) | (nodeid[3] << 16) | (nodeid[4] << 8) | nodeid[5], ); } - const data = (nodeid[0] & 0x20) !== 0 ? - Uint8Array.of(((nodeid[0] & 0x0f) << 4) | (nodeid[1] >> 4)) : - EMPTY_UINT8; - newPromise = Promise.resolve(new PrefixSearchTree( - EMPTY_SEARCH_TREE_BRANCHES, - EMPTY_SEARCH_TREE_BRANCHES, - data, - isWhole ? leaves : EMPTY_BITMAP, - isWhole ? EMPTY_BITMAP : leaves, - )); + if (isWhole) { + const data = (nodeid[0] & 0x20) !== 0 ? + Uint8Array.of(((nodeid[0] & 0x0f) << 4) | (nodeid[1] >> 4)) : + EMPTY_UINT8; + newPromise = Promise.resolve(new PrefixSearchTree( + EMPTY_SEARCH_TREE_BRANCHES, + EMPTY_SEARCH_TREE_BRANCHES, + data, + leaves, + EMPTY_BITMAP, + )); + } else { + const data = (nodeid[0] & 0xf0) === 0x80 ? 0 : ( + ((nodeid[0] & 0x0f) << 4) | (nodeid[1] >> 4)); + newPromise = Promise.resolve(new SuffixSearchTree( + EMPTY_SEARCH_TREE_BRANCHES, + data, + leaves, + )); + } } else { const hashHex = makeHexFromUint8Array(nodeid); newPromise = new Promise((resolve, reject) => { @@ -2748,6 +2765,7 @@ function loadDatabase(hooks) { // because that's the canonical, hashed version of the data let compression_tag = input[i]; const is_pure_suffixes_only_node = (compression_tag & 0x01) !== 0; + let no_leaves_flag; if (compression_tag > 1) { // compressed node const is_long_compressed = (compression_tag & 0x04) !== 0; @@ -2759,7 +2777,8 @@ function loadDatabase(hooks) { compression_tag |= input[i] << 16; i += 1; } - let dlen = input[i]; + let dlen = input[i] & 0x7F; + no_leaves_flag = input[i] & 0x80; i += 1; if (is_data_compressed) { data = data_history[data_history.length - dlen - 1]; @@ -2786,10 +2805,15 @@ function loadDatabase(hooks) { let whole; let suffix; if (is_pure_suffixes_only_node) { - suffix = input[i] === 0 ? - EMPTY_BITMAP1 : - new RoaringBitmap(input, i); - i += suffix.consumed_len_bytes; + if (no_leaves_flag) { + whole = EMPTY_BITMAP; + suffix = EMPTY_BITMAP; + } else { + suffix = input[i] === 0 ? + EMPTY_BITMAP1 : + new RoaringBitmap(input, i); + i += suffix.consumed_len_bytes; + } tree = new SuffixSearchTree( branches, dlen, @@ -2807,7 +2831,7 @@ function loadDatabase(hooks) { let ci = 0; canonical[ci] = 1; ci += 1; - canonical[ci] = dlen; + canonical[ci] = dlen | no_leaves_flag; ci += 1; canonical[ci] = input[coffset]; // suffix child count ci += 1; @@ -2821,10 +2845,9 @@ function loadDatabase(hooks) { } siphashOfBytes(canonical.subarray(0, clen), 0, 0, 0, 0, hash); } else { - if (input[i] === 0xff) { + if (no_leaves_flag) { whole = EMPTY_BITMAP; - suffix = EMPTY_BITMAP1; - i += 1; + suffix = EMPTY_BITMAP; } else { whole = input[i] === 0 ? EMPTY_BITMAP1 : @@ -2856,7 +2879,7 @@ function loadDatabase(hooks) { let ci = 0; canonical[ci] = 0; ci += 1; - canonical[ci] = dlen; + canonical[ci] = dlen | no_leaves_flag; ci += 1; canonical.set(data, ci); ci += data.length; @@ -2880,9 +2903,11 @@ function loadDatabase(hooks) { } hash[2] &= 0x7f; } else { + i += 1; // uncompressed node - const dlen = input [i + 1]; - i += 2; + const dlen = input[i] & 0x7F; + no_leaves_flag = input[i] & 0x80; + i += 1; if (dlen === 0 || is_pure_suffixes_only_node) { data = EMPTY_UINT8; } else { @@ -2897,16 +2922,15 @@ function loadDatabase(hooks) { i += branches_consumed_len_bytes; let whole; let suffix; - if (is_pure_suffixes_only_node) { + if (no_leaves_flag) { + whole = EMPTY_BITMAP; + suffix = EMPTY_BITMAP; + } else if (is_pure_suffixes_only_node) { whole = EMPTY_BITMAP; suffix = input[i] === 0 ? EMPTY_BITMAP1 : new RoaringBitmap(input, i); i += suffix.consumed_len_bytes; - } else if (input[i] === 0xff) { - whole = EMPTY_BITMAP; - suffix = EMPTY_BITMAP; - i += 1; } else { whole = input[i] === 0 ? EMPTY_BITMAP1 : -- cgit 1.4.1-3-g733a5