about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-02-09 20:23:00 +0000
committerbors <bors@rust-lang.org>2023-02-09 20:23:00 +0000
commit8996ea93b6e554148c4286e62b613f12a3ee505c (patch)
tree1e54719a1d3b2d503d76ad2c7f4bc77d6c51ff45 /src
parent8cca42a47f5d574c8f7302c98c3f918cdc772fbb (diff)
parentb080a1a4fc68bc6150a281f21012b79b9f1ee57f (diff)
downloadrust-8996ea93b6e554148c4286e62b613f12a3ee505c.tar.gz
rust-8996ea93b6e554148c4286e62b613f12a3ee505c.zip
Auto merge of #107853 - Dylan-DPC:rollup-macf1qo, r=Dylan-DPC
Rollup of 6 pull requests

Successful merges:

 - #107648 (unused-lifetimes: don't warn about lifetimes originating from expanded code)
 - #107655 (rustdoc: use the same URL escape rules for fragments as for examples)
 - #107659 (test: snapshot for derive suggestion in diff files)
 - #107786 (Implement some tweaks in the new solver)
 - #107803 (Do not bring trait alias supertraits into scope)
 - #107815 (Disqualify `auto trait` built-in impl in new solver if explicit `impl` exists)

Failed merges:

r? `@ghost`
`@rustbot` modify labels: rollup
Diffstat (limited to 'src')
-rw-r--r--src/librustdoc/html/markdown.rs43
-rw-r--r--src/librustdoc/html/render/mod.rs75
2 files changed, 54 insertions, 64 deletions
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
index fb7c34118a4..94de93e7a99 100644
--- a/src/librustdoc/html/markdown.rs
+++ b/src/librustdoc/html/markdown.rs
@@ -46,6 +46,7 @@ use crate::html::escape::Escape;
 use crate::html::format::Buffer;
 use crate::html::highlight;
 use crate::html::length_limit::HtmlWithLimit;
+use crate::html::render::small_url_encode;
 use crate::html::toc::TocBuilder;
 
 use pulldown_cmark::{
@@ -294,47 +295,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
                 doctest::make_test(&test, krate, false, &Default::default(), edition, None);
             let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
 
-            // These characters don't need to be escaped in a URI.
-            // See https://url.spec.whatwg.org/#query-percent-encode-set
-            // and https://url.spec.whatwg.org/#urlencoded-parsing
-            // and https://url.spec.whatwg.org/#url-code-points
-            fn dont_escape(c: u8) -> bool {
-                (b'a' <= c && c <= b'z')
-                    || (b'A' <= c && c <= b'Z')
-                    || (b'0' <= c && c <= b'9')
-                    || c == b'-'
-                    || c == b'_'
-                    || c == b'.'
-                    || c == b','
-                    || c == b'~'
-                    || c == b'!'
-                    || c == b'\''
-                    || c == b'('
-                    || c == b')'
-                    || c == b'*'
-                    || c == b'/'
-                    || c == b';'
-                    || c == b':'
-                    || c == b'?'
-                    // As described in urlencoded-parsing, the
-                    // first `=` is the one that separates key from
-                    // value. Following `=`s are part of the value.
-                    || c == b'='
-            }
-            let mut test_escaped = String::new();
-            for b in test.bytes() {
-                if dont_escape(b) {
-                    test_escaped.push(char::from(b));
-                } else if b == b' ' {
-                    // URL queries are decoded with + replaced with SP
-                    test_escaped.push('+');
-                } else if b == b'%' {
-                    test_escaped.push('%');
-                    test_escaped.push('%');
-                } else {
-                    write!(test_escaped, "%{:02X}", b).unwrap();
-                }
-            }
+            let test_escaped = small_url_encode(test);
             Some(format!(
                 r#"<a class="test-arrow" target="_blank" href="{}?code={}{}&amp;edition={}">Run</a>"#,
                 url, test_escaped, channel, edition,
diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs
index 816a8f4e274..fa22c461205 100644
--- a/src/librustdoc/html/render/mod.rs
+++ b/src/librustdoc/html/render/mod.rs
@@ -38,7 +38,7 @@ pub(crate) use self::span_map::{collect_spans_and_sources, LinkFromSrc};
 
 use std::collections::VecDeque;
 use std::default::Default;
-use std::fmt;
+use std::fmt::{self, Write};
 use std::fs;
 use std::iter::Peekable;
 use std::path::PathBuf;
@@ -2020,31 +2020,60 @@ fn get_associated_constants(
         .collect::<Vec<_>>()
 }
 
-// The point is to url encode any potential character from a type with genericity.
-fn small_url_encode(s: String) -> String {
+pub(crate) fn small_url_encode(s: String) -> String {
+    // These characters don't need to be escaped in a URI.
+    // See https://url.spec.whatwg.org/#query-percent-encode-set
+    // and https://url.spec.whatwg.org/#urlencoded-parsing
+    // and https://url.spec.whatwg.org/#url-code-points
+    fn dont_escape(c: u8) -> bool {
+        (b'a' <= c && c <= b'z')
+            || (b'A' <= c && c <= b'Z')
+            || (b'0' <= c && c <= b'9')
+            || c == b'-'
+            || c == b'_'
+            || c == b'.'
+            || c == b','
+            || c == b'~'
+            || c == b'!'
+            || c == b'\''
+            || c == b'('
+            || c == b')'
+            || c == b'*'
+            || c == b'/'
+            || c == b';'
+            || c == b':'
+            || c == b'?'
+            // As described in urlencoded-parsing, the
+            // first `=` is the one that separates key from
+            // value. Following `=`s are part of the value.
+            || c == b'='
+    }
     let mut st = String::new();
     let mut last_match = 0;
-    for (idx, c) in s.char_indices() {
-        let escaped = match c {
-            '<' => "%3C",
-            '>' => "%3E",
-            ' ' => "%20",
-            '?' => "%3F",
-            '\'' => "%27",
-            '&' => "%26",
-            ',' => "%2C",
-            ':' => "%3A",
-            ';' => "%3B",
-            '[' => "%5B",
-            ']' => "%5D",
-            '"' => "%22",
-            _ => continue,
-        };
+    for (idx, b) in s.bytes().enumerate() {
+        if dont_escape(b) {
+            continue;
+        }
 
-        st += &s[last_match..idx];
-        st += escaped;
-        // NOTE: we only expect single byte characters here - which is fine as long as we
-        // only match single byte characters
+        if last_match != idx {
+            // Invariant: `idx` must be the first byte in a character at this point.
+            st += &s[last_match..idx];
+        }
+        if b == b' ' {
+            // URL queries are decoded with + replaced with SP.
+            // While the same is not true for hashes, rustdoc only needs to be
+            // consistent with itself when encoding them.
+            st += "+";
+        } else if b == b'%' {
+            st += "%%";
+        } else {
+            write!(st, "%{:02X}", b).unwrap();
+        }
+        // Invariant: if the current byte is not at the start of a multi-byte character,
+        // we need to get down here so that when the next turn of the loop comes around,
+        // last_match winds up equalling idx.
+        //
+        // In other words, dont_escape must always return `false` in multi-byte character.
         last_match = idx + 1;
     }