about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Howell <michael@notriddle.com>2023-12-01 11:04:34 -0700
committerMichael Howell <michael@notriddle.com>2023-12-01 15:15:45 -0700
commit6da2b7dd8f9e32fe94969ebcf8f8745c2b2108aa (patch)
treeedf20a463e5a6e0e889f009a6fb3903ae61f9f55
parent7230f6c5c51d8668a16d92da9f49d442521de828 (diff)
downloadrust-6da2b7dd8f9e32fe94969ebcf8f8745c2b2108aa.tar.gz
rust-6da2b7dd8f9e32fe94969ebcf8f8745c2b2108aa.zip
rustdoc: do not escape quotes in body text
Escaping quote marks is only needed in attributes, not text.

```console
$ du -hs doc-old/ doc-new/
670M    doc-old/
669M    doc-new/
```
-rw-r--r--src/librustdoc/html/escape.rs36
-rw-r--r--src/librustdoc/html/highlight.rs12
-rw-r--r--src/librustdoc/html/highlight/fixtures/dos_line.html2
-rw-r--r--src/librustdoc/html/highlight/fixtures/sample.html8
4 files changed, 50 insertions, 8 deletions
diff --git a/src/librustdoc/html/escape.rs b/src/librustdoc/html/escape.rs
index 4a19d0a44c3..ea4b573aeb9 100644
--- a/src/librustdoc/html/escape.rs
+++ b/src/librustdoc/html/escape.rs
@@ -38,3 +38,39 @@ impl<'a> fmt::Display for Escape<'a> {
         Ok(())
     }
 }
+
+/// Wrapper struct which will emit the HTML-escaped version of the contained
+/// string when passed to a format string.
+///
+/// This is only safe to use for text nodes. If you need your output to be
+/// safely contained in an attribute, use [`Escape`]. If you don't know the
+/// difference, use [`Escape`].
+pub(crate) struct EscapeBodyText<'a>(pub &'a str);
+
+impl<'a> fmt::Display for EscapeBodyText<'a> {
+    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Because the internet is always right, turns out there's not that many
+        // characters to escape: http://stackoverflow.com/questions/7381974
+        let EscapeBodyText(s) = *self;
+        let pile_o_bits = s;
+        let mut last = 0;
+        for (i, ch) in s.char_indices() {
+            let s = match ch {
+                '>' => "&gt;",
+                '<' => "&lt;",
+                '&' => "&amp;",
+                _ => continue,
+            };
+            fmt.write_str(&pile_o_bits[last..i])?;
+            fmt.write_str(s)?;
+            // NOTE: we only expect single byte characters here - which is fine as long as we
+            // only match single byte characters
+            last = i + 1;
+        }
+
+        if last < s.len() {
+            fmt.write_str(&pile_o_bits[last..])?;
+        }
+        Ok(())
+    }
+}
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index b762c8a1ce6..1cdc792a819 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -6,7 +6,7 @@
 //! Use the `render_with_highlighting` to highlight some rust code.
 
 use crate::clean::PrimitiveType;
-use crate::html::escape::Escape;
+use crate::html::escape::EscapeBodyText;
 use crate::html::render::{Context, LinkFromSrc};
 
 use std::collections::VecDeque;
@@ -189,7 +189,7 @@ impl<'a, 'tcx, F: Write> TokenHandler<'a, 'tcx, F> {
             && can_merge(current_class, Some(*parent_class), "")
         {
             for (text, class) in self.pending_elems.iter() {
-                string(self.out, Escape(text), *class, &self.href_context, false);
+                string(self.out, EscapeBodyText(text), *class, &self.href_context, false);
             }
         } else {
             // We only want to "open" the tag ourselves if we have more than one pending and if the
@@ -202,7 +202,13 @@ impl<'a, 'tcx, F: Write> TokenHandler<'a, 'tcx, F> {
                 None
             };
             for (text, class) in self.pending_elems.iter() {
-                string(self.out, Escape(text), *class, &self.href_context, close_tag.is_none());
+                string(
+                    self.out,
+                    EscapeBodyText(text),
+                    *class,
+                    &self.href_context,
+                    close_tag.is_none(),
+                );
             }
             if let Some(close_tag) = close_tag {
                 exit_span(self.out, close_tag);
diff --git a/src/librustdoc/html/highlight/fixtures/dos_line.html b/src/librustdoc/html/highlight/fixtures/dos_line.html
index 30b50ca7c66..b98e6712590 100644
--- a/src/librustdoc/html/highlight/fixtures/dos_line.html
+++ b/src/librustdoc/html/highlight/fixtures/dos_line.html
@@ -1,3 +1,3 @@
 <span class="kw">pub fn </span>foo() {
-<span class="macro">println!</span>(<span class="string">&quot;foo&quot;</span>);
+<span class="macro">println!</span>(<span class="string">"foo"</span>);
 }
diff --git a/src/librustdoc/html/highlight/fixtures/sample.html b/src/librustdoc/html/highlight/fixtures/sample.html
index fced2eacd9e..aa735e81597 100644
--- a/src/librustdoc/html/highlight/fixtures/sample.html
+++ b/src/librustdoc/html/highlight/fixtures/sample.html
@@ -8,12 +8,12 @@
 .lifetime { color: #B76514; }
 .question-mark { color: #ff9011; }
 </style>
-<pre><code><span class="attr">#![crate_type = <span class="string">&quot;lib&quot;</span>]
+<pre><code><span class="attr">#![crate_type = <span class="string">"lib"</span>]
 
 </span><span class="kw">use </span>std::path::{Path, PathBuf};
 
-<span class="attr">#[cfg(target_os = <span class="string">&quot;linux&quot;</span>)]
-#[cfg(target_os = <span class="string">&quot;windows&quot;</span>)]
+<span class="attr">#[cfg(target_os = <span class="string">"linux"</span>)]
+#[cfg(target_os = <span class="string">"windows"</span>)]
 </span><span class="kw">fn </span>main() -&gt; () {
     <span class="kw">let </span>foo = <span class="bool-val">true </span>&amp;&amp; <span class="bool-val">false </span>|| <span class="bool-val">true</span>;
     <span class="kw">let _</span>: <span class="kw-2">*const </span>() = <span class="number">0</span>;
@@ -22,7 +22,7 @@
     <span class="kw">let _ </span>= <span class="kw-2">*</span>foo;
     <span class="macro">mac!</span>(foo, <span class="kw-2">&amp;mut </span>bar);
     <span class="macro">assert!</span>(<span class="self">self</span>.length &lt; N &amp;&amp; index &lt;= <span class="self">self</span>.length);
-    ::std::env::var(<span class="string">&quot;gateau&quot;</span>).is_ok();
+    ::std::env::var(<span class="string">"gateau"</span>).is_ok();
     <span class="attr">#[rustfmt::skip]
     </span><span class="kw">let </span>s:std::path::PathBuf = std::path::PathBuf::new();
     <span class="kw">let </span><span class="kw-2">mut </span>s = String::new();