about summary refs log tree commit diff
path: root/tests
diff options
context:
space:
mode:
authorMichael Howell <michael@notriddle.com>2024-11-13 10:46:27 -0700
committerMichael Howell <michael@notriddle.com>2024-11-13 12:04:46 -0700
commit86da4be47f3a0d52b894535a227ee3f2515b9af3 (patch)
treed4012245776a14449b6b6caa88b7307e95dc1eb6 /tests
parent242f20dc1e95ca8def0ff436d7c844811ae7ac25 (diff)
downloadrust-86da4be47f3a0d52b894535a227ee3f2515b9af3.tar.gz
rust-86da4be47f3a0d52b894535a227ee3f2515b9af3.zip
rustdoc: use a trie for name-based search
Preview and profiler results
----------------------------

Here's some quick profiling in Firefox done on the rust compiler docs:

- Before: https://share.firefox.dev/3UPm3M8
- After: https://share.firefox.dev/40LXvYb

Here's the results for the node.js profiler:

- https://notriddle.com/rustdoc-html-demo-15/trie-perf/index.html

Here's a copy that you can use to try it out. Compare it with [the nightly].
Try typing `typecheckercontext` one character at a time, slowly.

- https://notriddle.com/rustdoc-html-demo-15/compiler-doc-trie/index.html

[the nightly]: https://doc.rust-lang.org/nightly/nightly-rustc/

The fuzzy match algo is based on [Fast String Correction with
Levenshtein-Automata] and the corresponding implementation code in [moman]
and [Lucene]; the bit-packing representation comes from Lucene, but the
actual matcher is more based on `fsc.py`. As suggested in the paper, a
trie is used to represent the FSA dictionary.

The same trie is used for prefix matching. Substring matching is done with a
side table of three-character[^1] windows that point into the trie.

[Fast String Correction with Levenshtein-Automata]: https://github.com/tpn/pdfs/blob/master/Fast%20String%20Correction%20with%20Levenshtein-Automata%20(2002)%20(10.1.1.16.652).pdf
[Lucene]: https://fossies.org/linux/lucene/lucene/core/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java
[moman]: https://gitlab.com/notriddle/moman-rustdoc

User-visible changes
--------------------

I don't expect anybody to notice anything, but it does cause two changes:

- Substring matches, in the middle of a name, only apply if there's three
  or more characters in the search query.
- Levenshtein distance limit now maxes out at two. In the old version,
  the limit was w/3, so you could get looser matches for queries with
  9 or more characters[^1] in them.

[^1]: technically utf-16 code units
Diffstat (limited to 'tests')
-rw-r--r--tests/rustdoc-js-std/deduplication.js1
-rw-r--r--tests/rustdoc-js-std/path-maxeditdistance.js12
-rw-r--r--tests/rustdoc-js/non-english-identifier.js26
-rw-r--r--tests/rustdoc-js/path-maxeditdistance.js25
-rw-r--r--tests/rustdoc-js/prototype.js4
5 files changed, 45 insertions, 23 deletions
diff --git a/tests/rustdoc-js-std/deduplication.js b/tests/rustdoc-js-std/deduplication.js
index 51279dd5ed4..95049d0a174 100644
--- a/tests/rustdoc-js-std/deduplication.js
+++ b/tests/rustdoc-js-std/deduplication.js
@@ -5,6 +5,5 @@ const EXPECTED = {
     'others': [
         { 'path': 'std::f32', 'name': 'is_nan' },
         { 'path': 'std::f64', 'name': 'is_nan' },
-        { 'path': 'std::option::Option', 'name': 'is_none' },
     ],
 };
diff --git a/tests/rustdoc-js-std/path-maxeditdistance.js b/tests/rustdoc-js-std/path-maxeditdistance.js
index 632df658f75..af71713f055 100644
--- a/tests/rustdoc-js-std/path-maxeditdistance.js
+++ b/tests/rustdoc-js-std/path-maxeditdistance.js
@@ -3,16 +3,8 @@ const FILTER_CRATE = "std";
 const EXPECTED = [
     {
         query: 'vec::intoiterator',
-        others: [
-            // trait std::iter::IntoIterator is not the first result
-            { 'path': 'std::vec', 'name': 'IntoIter' },
-            { 'path': 'std::vec::Vec', 'name': 'into_iter' },
-            { 'path': 'std::vec::Drain', 'name': 'into_iter' },
-            { 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
-            { 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
-            { 'path': 'std::vec::Splice', 'name': 'into_iter' },
-            { 'path': 'std::collections::vec_deque::VecDeque', 'name': 'into_iter' },
-        ],
+        // trait std::iter::IntoIterator is not the first result
+        others: [],
     },
     {
         query: 'vec::iter',
diff --git a/tests/rustdoc-js/non-english-identifier.js b/tests/rustdoc-js/non-english-identifier.js
index 6a2c4cc637c..f2180b4c755 100644
--- a/tests/rustdoc-js/non-english-identifier.js
+++ b/tests/rustdoc-js/non-english-identifier.js
@@ -133,22 +133,34 @@ const EXPECTED = [
                 path: "non_english_identifier",
                 href: "../non_english_identifier/trait.加法.html",
                 desc: "Add"
-            },
+            }],
+        in_args: [{
+            name: "加上",
+            path: "non_english_identifier::加法",
+            href: "../non_english_identifier/trait.加法.html#tymethod.加上",
+        }],
+        returned: [],
+    },
+    { // levensthein and substring checking only kick in at three characters
+        query: '加法宏',
+        others: [
             {
                 name: "中文名称的加法宏",
                 path: "non_english_identifier",
                 href: "../non_english_identifier/macro.中文名称的加法宏.html",
-            },
+            }],
+        in_args: [],
+        returned: [],
+    },
+    { // levensthein and substring checking only kick in at three characters
+        query: '加法A',
+        others: [
             {
                 name: "中文名称的加法API",
                 path: "non_english_identifier",
                 href: "../non_english_identifier/fn.中文名称的加法API.html",
             }],
-        in_args: [{
-            name: "加上",
-            path: "non_english_identifier::加法",
-            href: "../non_english_identifier/trait.加法.html#tymethod.加上",
-        }],
+        in_args: [],
         returned: [],
     },
     { // Extensive type-based search is still buggy, experimental & work-in-progress.
diff --git a/tests/rustdoc-js/path-maxeditdistance.js b/tests/rustdoc-js/path-maxeditdistance.js
index 73b24a6dddf..cf700193ac4 100644
--- a/tests/rustdoc-js/path-maxeditdistance.js
+++ b/tests/rustdoc-js/path-maxeditdistance.js
@@ -14,21 +14,38 @@ const EXPECTED = [
         ],
     },
     {
-        // swap br/rb; that's edit distance 2, where maxPathEditDistance = 3 (11 / 3)
+        // swap br/rb; that's edit distance 1, where maxPathEditDistance = 2
         'query': 'arbacadarba::hocuspocusprestidigitation',
         'others': [
             { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
         ],
     },
     {
-        // truncate 5 chars, where maxEditDistance = 7 (21 / 3)
-        'query': 'abracadarba::hocusprestidigitation',
+        // swap p/o o/p, that's also edit distance 1
+        'query': 'abracadabra::hocusopcusprestidigitation',
         'others': [
             { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
         ],
     },
     {
-        // truncate 9 chars, where maxEditDistance = 5 (17 / 3)
+        // swap p/o o/p and gi/ig, that's edit distance 2
+        'query': 'abracadabra::hocusopcusprestidiigtation',
+        'others': [
+            { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
+        ],
+    },
+    {
+        // swap p/o o/p, gi/ig, and ti/it, that's edit distance 3 and not shown (we stop at 2)
+        'query': 'abracadabra::hocusopcusprestidiigtaiton',
+        'others': [],
+    },
+    {
+        // truncate 5 chars, where maxEditDistance = 2
+        'query': 'abracadarba::hocusprestidigitation',
+        'others': [],
+    },
+    {
+        // truncate 9 chars, where maxEditDistance = 2
         'query': 'abracadarba::hprestidigitation',
         'others': [],
     },
diff --git a/tests/rustdoc-js/prototype.js b/tests/rustdoc-js/prototype.js
index da72fdce3db..0862acd8ffa 100644
--- a/tests/rustdoc-js/prototype.js
+++ b/tests/rustdoc-js/prototype.js
@@ -9,7 +9,9 @@ const EXPECTED = [
     },
     {
         'query': '__proto__',
-        'others': [],
+        'others': [
+            {"path": "", "name": "prototype"},
+        ],
         'returned': [],
         'in_args': [],
     },