diff options
| author | Michael Howell <michael@notriddle.com> | 2024-11-13 10:46:27 -0700 |
|---|---|---|
| committer | Michael Howell <michael@notriddle.com> | 2024-11-13 12:04:46 -0700 |
| commit | 86da4be47f3a0d52b894535a227ee3f2515b9af3 (patch) | |
| tree | d4012245776a14449b6b6caa88b7307e95dc1eb6 /tests | |
| parent | 242f20dc1e95ca8def0ff436d7c844811ae7ac25 (diff) | |
| download | rust-86da4be47f3a0d52b894535a227ee3f2515b9af3.tar.gz rust-86da4be47f3a0d52b894535a227ee3f2515b9af3.zip | |
rustdoc: use a trie for name-based search
Preview and profiler results ---------------------------- Here's some quick profiling in Firefox done on the rust compiler docs: - Before: https://share.firefox.dev/3UPm3M8 - After: https://share.firefox.dev/40LXvYb Here's the results for the node.js profiler: - https://notriddle.com/rustdoc-html-demo-15/trie-perf/index.html Here's a copy that you can use to try it out. Compare it with [the nightly]. Try typing `typecheckercontext` one character at a time, slowly. - https://notriddle.com/rustdoc-html-demo-15/compiler-doc-trie/index.html [the nightly]: https://doc.rust-lang.org/nightly/nightly-rustc/ The fuzzy match algo is based on [Fast String Correction with Levenshtein-Automata] and the corresponding implementation code in [moman] and [Lucene]; the bit-packing representation comes from Lucene, but the actual matcher is more based on `fsc.py`. As suggested in the paper, a trie is used to represent the FSA dictionary. The same trie is used for prefix matching. Substring matching is done with a side table of three-character[^1] windows that point into the trie. [Fast String Correction with Levenshtein-Automata]: https://github.com/tpn/pdfs/blob/master/Fast%20String%20Correction%20with%20Levenshtein-Automata%20(2002)%20(10.1.1.16.652).pdf [Lucene]: https://fossies.org/linux/lucene/lucene/core/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java [moman]: https://gitlab.com/notriddle/moman-rustdoc User-visible changes -------------------- I don't expect anybody to notice anything, but it does cause two changes: - Substring matches, in the middle of a name, only apply if there's three or more characters in the search query. - Levenshtein distance limit now maxes out at two. In the old version, the limit was w/3, so you could get looser matches for queries with 9 or more characters[^1] in them. [^1]: technically utf-16 code units
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/rustdoc-js-std/deduplication.js | 1 | ||||
| -rw-r--r-- | tests/rustdoc-js-std/path-maxeditdistance.js | 12 | ||||
| -rw-r--r-- | tests/rustdoc-js/non-english-identifier.js | 26 | ||||
| -rw-r--r-- | tests/rustdoc-js/path-maxeditdistance.js | 25 | ||||
| -rw-r--r-- | tests/rustdoc-js/prototype.js | 4 |
5 files changed, 45 insertions, 23 deletions
diff --git a/tests/rustdoc-js-std/deduplication.js b/tests/rustdoc-js-std/deduplication.js index 51279dd5ed4..95049d0a174 100644 --- a/tests/rustdoc-js-std/deduplication.js +++ b/tests/rustdoc-js-std/deduplication.js @@ -5,6 +5,5 @@ const EXPECTED = { 'others': [ { 'path': 'std::f32', 'name': 'is_nan' }, { 'path': 'std::f64', 'name': 'is_nan' }, - { 'path': 'std::option::Option', 'name': 'is_none' }, ], }; diff --git a/tests/rustdoc-js-std/path-maxeditdistance.js b/tests/rustdoc-js-std/path-maxeditdistance.js index 632df658f75..af71713f055 100644 --- a/tests/rustdoc-js-std/path-maxeditdistance.js +++ b/tests/rustdoc-js-std/path-maxeditdistance.js @@ -3,16 +3,8 @@ const FILTER_CRATE = "std"; const EXPECTED = [ { query: 'vec::intoiterator', - others: [ - // trait std::iter::IntoIterator is not the first result - { 'path': 'std::vec', 'name': 'IntoIter' }, - { 'path': 'std::vec::Vec', 'name': 'into_iter' }, - { 'path': 'std::vec::Drain', 'name': 'into_iter' }, - { 'path': 'std::vec::IntoIter', 'name': 'into_iter' }, - { 'path': 'std::vec::ExtractIf', 'name': 'into_iter' }, - { 'path': 'std::vec::Splice', 'name': 'into_iter' }, - { 'path': 'std::collections::vec_deque::VecDeque', 'name': 'into_iter' }, - ], + // trait std::iter::IntoIterator is not the first result + others: [], }, { query: 'vec::iter', diff --git a/tests/rustdoc-js/non-english-identifier.js b/tests/rustdoc-js/non-english-identifier.js index 6a2c4cc637c..f2180b4c755 100644 --- a/tests/rustdoc-js/non-english-identifier.js +++ b/tests/rustdoc-js/non-english-identifier.js @@ -133,22 +133,34 @@ const EXPECTED = [ path: "non_english_identifier", href: "../non_english_identifier/trait.加法.html", desc: "Add" - }, + }], + in_args: [{ + name: "加上", + path: "non_english_identifier::加法", + href: "../non_english_identifier/trait.加法.html#tymethod.加上", + }], + returned: [], + }, + { // levensthein and substring checking only kick in at three characters + query: '加法宏', + others: [ { name: "中文名称的加法宏", path: "non_english_identifier", href: "../non_english_identifier/macro.中文名称的加法宏.html", - }, + }], + in_args: [], + returned: [], + }, + { // levensthein and substring checking only kick in at three characters + query: '加法A', + others: [ { name: "中文名称的加法API", path: "non_english_identifier", href: "../non_english_identifier/fn.中文名称的加法API.html", }], - in_args: [{ - name: "加上", - path: "non_english_identifier::加法", - href: "../non_english_identifier/trait.加法.html#tymethod.加上", - }], + in_args: [], returned: [], }, { // Extensive type-based search is still buggy, experimental & work-in-progress. diff --git a/tests/rustdoc-js/path-maxeditdistance.js b/tests/rustdoc-js/path-maxeditdistance.js index 73b24a6dddf..cf700193ac4 100644 --- a/tests/rustdoc-js/path-maxeditdistance.js +++ b/tests/rustdoc-js/path-maxeditdistance.js @@ -14,21 +14,38 @@ const EXPECTED = [ ], }, { - // swap br/rb; that's edit distance 2, where maxPathEditDistance = 3 (11 / 3) + // swap br/rb; that's edit distance 1, where maxPathEditDistance = 2 'query': 'arbacadarba::hocuspocusprestidigitation', 'others': [ { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' }, ], }, { - // truncate 5 chars, where maxEditDistance = 7 (21 / 3) - 'query': 'abracadarba::hocusprestidigitation', + // swap p/o o/p, that's also edit distance 1 + 'query': 'abracadabra::hocusopcusprestidigitation', 'others': [ { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' }, ], }, { - // truncate 9 chars, where maxEditDistance = 5 (17 / 3) + // swap p/o o/p and gi/ig, that's edit distance 2 + 'query': 'abracadabra::hocusopcusprestidiigtation', + 'others': [ + { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' }, + ], + }, + { + // swap p/o o/p, gi/ig, and ti/it, that's edit distance 3 and not shown (we stop at 2) + 'query': 'abracadabra::hocusopcusprestidiigtaiton', + 'others': [], + }, + { + // truncate 5 chars, where maxEditDistance = 2 + 'query': 'abracadarba::hocusprestidigitation', + 'others': [], + }, + { + // truncate 9 chars, where maxEditDistance = 2 'query': 'abracadarba::hprestidigitation', 'others': [], }, diff --git a/tests/rustdoc-js/prototype.js b/tests/rustdoc-js/prototype.js index da72fdce3db..0862acd8ffa 100644 --- a/tests/rustdoc-js/prototype.js +++ b/tests/rustdoc-js/prototype.js @@ -9,7 +9,9 @@ const EXPECTED = [ }, { 'query': '__proto__', - 'others': [], + 'others': [ + {"path": "", "name": "prototype"}, + ], 'returned': [], 'in_args': [], }, |
