//! HTML escaping. //! //! This module contains one unit struct, which can be used to HTML-escape a //! string of text (for use in a format string). use std::fmt; use unicode_segmentation::UnicodeSegmentation; /// Wrapper struct which will emit the HTML-escaped version of the contained /// string when passed to a format string. pub(crate) struct Escape<'a>(pub &'a str); impl<'a> fmt::Display for Escape<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { // Because the internet is always right, turns out there's not that many // characters to escape: http://stackoverflow.com/questions/7381974 let Escape(s) = *self; let pile_o_bits = s; let mut last = 0; for (i, ch) in s.char_indices() { let s = match ch { '>' => ">", '<' => "<", '&' => "&", '\'' => "'", '"' => """, _ => continue, }; fmt.write_str(&pile_o_bits[last..i])?; fmt.write_str(s)?; // NOTE: we only expect single byte characters here - which is fine as long as we // only match single byte characters last = i + 1; } if last < s.len() { fmt.write_str(&pile_o_bits[last..])?; } Ok(()) } } /// Wrapper struct which will emit the HTML-escaped version of the contained /// string when passed to a format string. /// /// This is only safe to use for text nodes. If you need your output to be /// safely contained in an attribute, use [`Escape`]. If you don't know the /// difference, use [`Escape`]. pub(crate) struct EscapeBodyText<'a>(pub &'a str); impl<'a> fmt::Display for EscapeBodyText<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { // Because the internet is always right, turns out there's not that many // characters to escape: http://stackoverflow.com/questions/7381974 let EscapeBodyText(s) = *self; let pile_o_bits = s; let mut last = 0; for (i, ch) in s.char_indices() { let s = match ch { '>' => ">", '<' => "<", '&' => "&", _ => continue, }; fmt.write_str(&pile_o_bits[last..i])?; fmt.write_str(s)?; // NOTE: we only expect single byte characters here - which is fine as long as we // only match single byte characters last = i + 1; } if last < s.len() { fmt.write_str(&pile_o_bits[last..])?; } Ok(()) } } /// Wrapper struct which will emit the HTML-escaped version of the contained /// string when passed to a format string. This function also word-breaks /// CamelCase and snake_case word names. /// /// This is only safe to use for text nodes. If you need your output to be /// safely contained in an attribute, use [`Escape`]. If you don't know the /// difference, use [`Escape`]. pub(crate) struct EscapeBodyTextWithWbr<'a>(pub &'a str); impl<'a> fmt::Display for EscapeBodyTextWithWbr<'a> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { let EscapeBodyTextWithWbr(text) = *self; if text.len() < 8 { return EscapeBodyText(text).fmt(fmt); } let mut last = 0; let mut it = text.grapheme_indices(true).peekable(); let _ = it.next(); // don't insert wbr before first char while let Some((i, s)) = it.next() { let pk = it.peek(); if s.chars().all(|c| c.is_whitespace()) { // don't need "First Second"; the space is enough EscapeBodyText(&text[last..i]).fmt(fmt)?; last = i; continue; } let is_uppercase = || s.chars().any(|c| c.is_uppercase()); let next_is_uppercase = || pk.map_or(true, |(_, t)| t.chars().any(|c| c.is_uppercase())); let next_is_underscore = || pk.map_or(true, |(_, t)| t.contains('_')); let next_is_colon = || pk.map_or(true, |(_, t)| t.contains(':')); if i - last > 3 && is_uppercase() && !next_is_uppercase() { EscapeBodyText(&text[last..i]).fmt(fmt)?; fmt.write_str("")?; last = i; } else if (s.contains(':') && !next_is_colon()) || (s.contains('_') && !next_is_underscore()) { EscapeBodyText(&text[last..i + 1]).fmt(fmt)?; fmt.write_str("")?; last = i + 1; } } if last < text.len() { EscapeBodyText(&text[last..]).fmt(fmt)?; } Ok(()) } } #[cfg(test)] mod tests;