about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorYuki Okushi <jtitor@2k36.org>2023-01-27 12:57:56 +0900
committerGitHub <noreply@github.com>2023-01-27 12:57:56 +0900
commitd68b5a42e2b481fa5ff43708e1ef820a6da13bd8 (patch)
tree2f228a68944cc40176ce53404d624a9e1664618c /src
parent5683915ca44f17e95d5049b943babbfdbca53961 (diff)
parent51df99f3c2bac3ee7158b115ff6d54b687d018e1 (diff)
downloadrust-d68b5a42e2b481fa5ff43708e1ef820a6da13bd8.tar.gz
rust-d68b5a42e2b481fa5ff43708e1ef820a6da13bd8.zip
Rollup merge of #107284 - notriddle:notriddle/plus, r=jsha
rustdoc: use smarter encoding for playground URL

The old way would compress okay with DEFLATE, but this version makes uncompressed docs smaller, which matters for memory usage and stuff like `cargo doc`.

Try it out: <https://play.rust-lang.org/?code=fn+main()+{%0Alet+mut+v+=+Vec::new();%0Av.push(1+/+1);%0Aprintln!(%22{}%22,+v[0]);%0A}>

In local testing, this change shrinks sample pages by anywhere between 4.0% and 0.031%

    $ du -b after.dir/std/vec/struct.Vec.html before.dir/std/vec/struct.Vec.html
    759235  after.dir/std/vec/struct.Vec.html
    781842  before.dir/std/vec/struct.Vec.html

100*((759235-781842)/781842)=-2.8

    $ du -b after.dir/std/num/struct.Wrapping.html before.dir/std/num/struct.Wrapping.html
    3194173 after.dir/std/num/struct.Wrapping.html
    3204351 before.dir/std/num/struct.Wrapping.html

100*((3194173-3204351)/3204351)=-0.031

    $ du -b after.dir/std/keyword.match.html before.dir/std/keyword.match.html
    8151    after.dir/std/keyword.match.html
    8495    before.dir/std/keyword.match.html

100*((8151-8495)/8495)=-4.0

Gzipped tarball sizes seem shrunk, but not by much.

    du -s before.tar.gz after.tar.gz
    69600   before.tar.gz
    69480   after.tar.gz

100*((69480-69600)/69600)=-0.17
Diffstat (limited to 'src')
-rw-r--r--src/librustdoc/html/markdown.rs19
1 files changed, 18 insertions, 1 deletions
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
index 4ff67fe1551..b1efbf4bdca 100644
--- a/src/librustdoc/html/markdown.rs
+++ b/src/librustdoc/html/markdown.rs
@@ -296,7 +296,9 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
             let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
 
             // These characters don't need to be escaped in a URI.
-            // FIXME: use a library function for percent encoding.
+            // See https://url.spec.whatwg.org/#query-percent-encode-set
+            // and https://url.spec.whatwg.org/#urlencoded-parsing
+            // and https://url.spec.whatwg.org/#url-code-points
             fn dont_escape(c: u8) -> bool {
                 (b'a' <= c && c <= b'z')
                     || (b'A' <= c && c <= b'Z')
@@ -304,17 +306,32 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
                     || c == b'-'
                     || c == b'_'
                     || c == b'.'
+                    || c == b','
                     || c == b'~'
                     || c == b'!'
                     || c == b'\''
                     || c == b'('
                     || c == b')'
                     || c == b'*'
+                    || c == b'/'
+                    || c == b';'
+                    || c == b':'
+                    || c == b'?'
+                    // As described in urlencoded-parsing, the
+                    // first `=` is the one that separates key from
+                    // value. Following `=`s are part of the value.
+                    || c == b'='
             }
             let mut test_escaped = String::new();
             for b in test.bytes() {
                 if dont_escape(b) {
                     test_escaped.push(char::from(b));
+                } else if b == b' ' {
+                    // URL queries are decoded with + replaced with SP
+                    test_escaped.push('+');
+                } else if b == b'%' {
+                    test_escaped.push('%');
+                    test_escaped.push('%');
                 } else {
                     write!(test_escaped, "%{:02X}", b).unwrap();
                 }