diff options
| author | bors <bors@rust-lang.org> | 2023-02-09 20:23:00 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2023-02-09 20:23:00 +0000 |
| commit | 8996ea93b6e554148c4286e62b613f12a3ee505c (patch) | |
| tree | 1e54719a1d3b2d503d76ad2c7f4bc77d6c51ff45 /src | |
| parent | 8cca42a47f5d574c8f7302c98c3f918cdc772fbb (diff) | |
| parent | b080a1a4fc68bc6150a281f21012b79b9f1ee57f (diff) | |
| download | rust-8996ea93b6e554148c4286e62b613f12a3ee505c.tar.gz rust-8996ea93b6e554148c4286e62b613f12a3ee505c.zip | |
Auto merge of #107853 - Dylan-DPC:rollup-macf1qo, r=Dylan-DPC
Rollup of 6 pull requests Successful merges: - #107648 (unused-lifetimes: don't warn about lifetimes originating from expanded code) - #107655 (rustdoc: use the same URL escape rules for fragments as for examples) - #107659 (test: snapshot for derive suggestion in diff files) - #107786 (Implement some tweaks in the new solver) - #107803 (Do not bring trait alias supertraits into scope) - #107815 (Disqualify `auto trait` built-in impl in new solver if explicit `impl` exists) Failed merges: r? `@ghost` `@rustbot` modify labels: rollup
Diffstat (limited to 'src')
| -rw-r--r-- | src/librustdoc/html/markdown.rs | 43 | ||||
| -rw-r--r-- | src/librustdoc/html/render/mod.rs | 75 |
2 files changed, 54 insertions, 64 deletions
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index fb7c34118a4..94de93e7a99 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -46,6 +46,7 @@ use crate::html::escape::Escape; use crate::html::format::Buffer; use crate::html::highlight; use crate::html::length_limit::HtmlWithLimit; +use crate::html::render::small_url_encode; use crate::html::toc::TocBuilder; use pulldown_cmark::{ @@ -294,47 +295,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> { doctest::make_test(&test, krate, false, &Default::default(), edition, None); let channel = if test.contains("#![feature(") { "&version=nightly" } else { "" }; - // These characters don't need to be escaped in a URI. - // See https://url.spec.whatwg.org/#query-percent-encode-set - // and https://url.spec.whatwg.org/#urlencoded-parsing - // and https://url.spec.whatwg.org/#url-code-points - fn dont_escape(c: u8) -> bool { - (b'a' <= c && c <= b'z') - || (b'A' <= c && c <= b'Z') - || (b'0' <= c && c <= b'9') - || c == b'-' - || c == b'_' - || c == b'.' - || c == b',' - || c == b'~' - || c == b'!' - || c == b'\'' - || c == b'(' - || c == b')' - || c == b'*' - || c == b'/' - || c == b';' - || c == b':' - || c == b'?' - // As described in urlencoded-parsing, the - // first `=` is the one that separates key from - // value. Following `=`s are part of the value. - || c == b'=' - } - let mut test_escaped = String::new(); - for b in test.bytes() { - if dont_escape(b) { - test_escaped.push(char::from(b)); - } else if b == b' ' { - // URL queries are decoded with + replaced with SP - test_escaped.push('+'); - } else if b == b'%' { - test_escaped.push('%'); - test_escaped.push('%'); - } else { - write!(test_escaped, "%{:02X}", b).unwrap(); - } - } + let test_escaped = small_url_encode(test); Some(format!( r#"<a class="test-arrow" target="_blank" href="{}?code={}{}&edition={}">Run</a>"#, url, test_escaped, channel, edition, diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 816a8f4e274..fa22c461205 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -38,7 +38,7 @@ pub(crate) use self::span_map::{collect_spans_and_sources, LinkFromSrc}; use std::collections::VecDeque; use std::default::Default; -use std::fmt; +use std::fmt::{self, Write}; use std::fs; use std::iter::Peekable; use std::path::PathBuf; @@ -2020,31 +2020,60 @@ fn get_associated_constants( .collect::<Vec<_>>() } -// The point is to url encode any potential character from a type with genericity. -fn small_url_encode(s: String) -> String { +pub(crate) fn small_url_encode(s: String) -> String { + // These characters don't need to be escaped in a URI. + // See https://url.spec.whatwg.org/#query-percent-encode-set + // and https://url.spec.whatwg.org/#urlencoded-parsing + // and https://url.spec.whatwg.org/#url-code-points + fn dont_escape(c: u8) -> bool { + (b'a' <= c && c <= b'z') + || (b'A' <= c && c <= b'Z') + || (b'0' <= c && c <= b'9') + || c == b'-' + || c == b'_' + || c == b'.' + || c == b',' + || c == b'~' + || c == b'!' + || c == b'\'' + || c == b'(' + || c == b')' + || c == b'*' + || c == b'/' + || c == b';' + || c == b':' + || c == b'?' + // As described in urlencoded-parsing, the + // first `=` is the one that separates key from + // value. Following `=`s are part of the value. + || c == b'=' + } let mut st = String::new(); let mut last_match = 0; - for (idx, c) in s.char_indices() { - let escaped = match c { - '<' => "%3C", - '>' => "%3E", - ' ' => "%20", - '?' => "%3F", - '\'' => "%27", - '&' => "%26", - ',' => "%2C", - ':' => "%3A", - ';' => "%3B", - '[' => "%5B", - ']' => "%5D", - '"' => "%22", - _ => continue, - }; + for (idx, b) in s.bytes().enumerate() { + if dont_escape(b) { + continue; + } - st += &s[last_match..idx]; - st += escaped; - // NOTE: we only expect single byte characters here - which is fine as long as we - // only match single byte characters + if last_match != idx { + // Invariant: `idx` must be the first byte in a character at this point. + st += &s[last_match..idx]; + } + if b == b' ' { + // URL queries are decoded with + replaced with SP. + // While the same is not true for hashes, rustdoc only needs to be + // consistent with itself when encoding them. + st += "+"; + } else if b == b'%' { + st += "%%"; + } else { + write!(st, "%{:02X}", b).unwrap(); + } + // Invariant: if the current byte is not at the start of a multi-byte character, + // we need to get down here so that when the next turn of the loop comes around, + // last_match winds up equalling idx. + // + // In other words, dont_escape must always return `false` in multi-byte character. last_match = idx + 1; } |
