about summary refs log tree commit diff
path: root/src/librustdoc/html
diff options
context:
space:
mode:
authorMichael Howell <michael@notriddle.com>2023-01-24 15:39:59 -0700
committerMichael Howell <michael@notriddle.com>2023-01-26 10:51:10 -0700
commit51df99f3c2bac3ee7158b115ff6d54b687d018e1 (patch)
tree846756fe9cebbb9d96234b6038e8fc4549cd7fc5 /src/librustdoc/html
parentc8e6a9e8b6251bbc8276cb78cabe1998deecbed7 (diff)
downloadrust-51df99f3c2bac3ee7158b115ff6d54b687d018e1.tar.gz
rust-51df99f3c2bac3ee7158b115ff6d54b687d018e1.zip
rustdoc: use smarter encoding for playground URL
The old way would compress okay with DEFLATE, but this version makes
uncompressed docs smaller, which matters for memory usage and stuff
like `cargo doc`.

Try it out: <https://play.rust-lang.org/?code=fn+main()+{%0Alet+mut+v+=+Vec::new();%0Av.push(1+/+1);%0Aprintln!(%22{}%22,+v[0]);%0A}>

In local testing, this change shrinks sample pages by anywhere between
4.0% and 0.031%

    $ du -b after.dir/std/vec/struct.Vec.html before.dir/std/vec/struct.Vec.html
    759235  after.dir/std/vec/struct.Vec.html
    781842  before.dir/std/vec/struct.Vec.html

100*((759235-781842)/781842)=-2.8

    $ du -b after.dir/std/num/struct.Wrapping.html before.dir/std/num/struct.Wrapping.html
    3194173 after.dir/std/num/struct.Wrapping.html
    3204351 before.dir/std/num/struct.Wrapping.html

100*((3194173-3204351)/3204351)=-0.031

    $ du -b after.dir/std/keyword.match.html before.dir/std/keyword.match.html
    8151    after.dir/std/keyword.match.html
    8495    before.dir/std/keyword.match.html

100*((8151-8495)/8495)=-4.0

Gzipped tarball sizes seem shrunk, but not by much.

    du -s before.tar.gz after.tar.gz
    69600   before.tar.gz
    69480   after.tar.gz

100*((69480-69600)/69600)=-0.17
Diffstat (limited to 'src/librustdoc/html')
-rw-r--r--src/librustdoc/html/markdown.rs19
1 files changed, 18 insertions, 1 deletions
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
index 4ff67fe1551..b1efbf4bdca 100644
--- a/src/librustdoc/html/markdown.rs
+++ b/src/librustdoc/html/markdown.rs
@@ -296,7 +296,9 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
             let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
 
             // These characters don't need to be escaped in a URI.
-            // FIXME: use a library function for percent encoding.
+            // See https://url.spec.whatwg.org/#query-percent-encode-set
+            // and https://url.spec.whatwg.org/#urlencoded-parsing
+            // and https://url.spec.whatwg.org/#url-code-points
             fn dont_escape(c: u8) -> bool {
                 (b'a' <= c && c <= b'z')
                     || (b'A' <= c && c <= b'Z')
@@ -304,17 +306,32 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
                     || c == b'-'
                     || c == b'_'
                     || c == b'.'
+                    || c == b','
                     || c == b'~'
                     || c == b'!'
                     || c == b'\''
                     || c == b'('
                     || c == b')'
                     || c == b'*'
+                    || c == b'/'
+                    || c == b';'
+                    || c == b':'
+                    || c == b'?'
+                    // As described in urlencoded-parsing, the
+                    // first `=` is the one that separates key from
+                    // value. Following `=`s are part of the value.
+                    || c == b'='
             }
             let mut test_escaped = String::new();
             for b in test.bytes() {
                 if dont_escape(b) {
                     test_escaped.push(char::from(b));
+                } else if b == b' ' {
+                    // URL queries are decoded with + replaced with SP
+                    test_escaped.push('+');
+                } else if b == b'%' {
+                    test_escaped.push('%');
+                    test_escaped.push('%');
                 } else {
                     write!(test_escaped, "%{:02X}", b).unwrap();
                 }