about summary refs log tree commit diff
path: root/src/librustdoc/html/escape.rs
blob: ac9e2f42cc6d0d59ef3a1bac9df1d793afc5cbca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
//! HTML escaping.
//!
//! This module contains one unit struct, which can be used to HTML-escape a
//! string of text (for use in a format string).

use std::fmt;

use pulldown_cmark_escape::FmtWriter;
use unicode_segmentation::UnicodeSegmentation;

/// Wrapper struct which will emit the HTML-escaped version of the contained
/// string when passed to a format string.
pub(crate) struct Escape<'a>(pub &'a str);

impl fmt::Display for Escape<'_> {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        pulldown_cmark_escape::escape_html(FmtWriter(fmt), self.0)
    }
}

/// Wrapper struct which will emit the HTML-escaped version of the contained
/// string when passed to a format string.
///
/// This is only safe to use for text nodes. If you need your output to be
/// safely contained in an attribute, use [`Escape`]. If you don't know the
/// difference, use [`Escape`].
pub(crate) struct EscapeBodyText<'a>(pub &'a str);

impl fmt::Display for EscapeBodyText<'_> {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        pulldown_cmark_escape::escape_html_body_text(FmtWriter(fmt), self.0)
    }
}

/// Wrapper struct which will emit the HTML-escaped version of the contained
/// string when passed to a format string. This function also word-breaks
/// CamelCase and snake_case word names.
///
/// This is only safe to use for text nodes. If you need your output to be
/// safely contained in an attribute, use [`Escape`]. If you don't know the
/// difference, use [`Escape`].
pub(crate) struct EscapeBodyTextWithWbr<'a>(pub &'a str);

impl fmt::Display for EscapeBodyTextWithWbr<'_> {
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        let EscapeBodyTextWithWbr(text) = *self;
        if text.len() < 8 {
            return EscapeBodyText(text).fmt(fmt);
        }
        let mut last = 0;
        let mut it = text.grapheme_indices(true).peekable();
        let _ = it.next(); // don't insert wbr before first char
        while let Some((i, s)) = it.next() {
            let pk = it.peek();
            if s.chars().all(|c| c.is_whitespace()) {
                // don't need "First <wbr>Second"; the space is enough
                EscapeBodyText(&text[last..i]).fmt(fmt)?;
                last = i;
                continue;
            }
            let is_uppercase = || s.chars().any(|c| c.is_uppercase());
            let next_is_uppercase = || pk.is_none_or(|(_, t)| t.chars().any(|c| c.is_uppercase()));
            let next_is_underscore = || pk.is_none_or(|(_, t)| t.contains('_'));
            let next_is_colon = || pk.is_none_or(|(_, t)| t.contains(':'));
            // Check for CamelCase.
            //
            // `i - last > 3` avoids turning FmRadio into Fm<wbr>Radio, which is technically
            // correct, but needlessly bloated.
            //
            // is_uppercase && !next_is_uppercase checks for camelCase. HTTPSProxy,
            // for example, should become HTTPS<wbr>Proxy.
            //
            // !next_is_underscore avoids turning TEST_RUN into TEST<wbr>_<wbr>RUN, which is also
            // needlessly bloated.
            if i - last > 3 && is_uppercase() && !next_is_uppercase() && !next_is_underscore() {
                EscapeBodyText(&text[last..i]).fmt(fmt)?;
                fmt.write_str("<wbr>")?;
                last = i;
            } else if (s.contains(':') && !next_is_colon())
                || (s.contains('_') && !next_is_underscore())
            {
                EscapeBodyText(&text[last..i + 1]).fmt(fmt)?;
                fmt.write_str("<wbr>")?;
                last = i + 1;
            }
        }
        if last < text.len() {
            EscapeBodyText(&text[last..]).fmt(fmt)?;
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests;