diff options
| author | Stuart Cook <Zalathar@users.noreply.github.com> | 2025-08-26 14:19:16 +1000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-26 14:19:16 +1000 |
| commit | aecc0287efcb91105bfd4751fc5a4e1873348758 (patch) | |
| tree | 5d5d962bce08c66eb17b3900b61c3ffd0995bf2e | |
| parent | e011dd47ee04cd1e62786b5a0b3bfe2d5e58ae35 (diff) | |
| parent | d022089f58fa6bf8f4f0bb020640836eb10eae7a (diff) | |
| download | rust-aecc0287efcb91105bfd4751fc5a4e1873348758.tar.gz rust-aecc0287efcb91105bfd4751fc5a4e1873348758.zip | |
Rollup merge of #145535 - lolbinarycat:rustdoc-invalid_html_tags-svg-145529, r=GuillaumeGomez
make rustdoc::invalid_html_tags more robust best reviewed a commit at a time. I kept finding more edge case so I ended up having to make quite significant changes to the parser in order to make it preserve state across events and handle multiline attributes correctly. fixes rust-lang/rust#145529
| -rw-r--r-- | src/librustdoc/lib.rs | 1 | ||||
| -rw-r--r-- | src/librustdoc/passes/lint/html_tags.rs | 476 | ||||
| -rw-r--r-- | src/librustdoc/passes/lint/html_tags/tests.rs | 73 | ||||
| -rw-r--r-- | tests/rustdoc-ui/lints/invalid-html-tags.rs | 94 | ||||
| -rw-r--r-- | tests/rustdoc-ui/lints/invalid-html-tags.stderr | 71 |
5 files changed, 534 insertions, 181 deletions
diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index d891d1fba25..e2682045ab4 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -11,6 +11,7 @@ #![feature(file_buffered)] #![feature(format_args_nl)] #![feature(if_let_guard)] +#![feature(iter_advance_by)] #![feature(iter_intersperse)] #![feature(round_char_boundary)] #![feature(rustc_private)] diff --git a/src/librustdoc/passes/lint/html_tags.rs b/src/librustdoc/passes/lint/html_tags.rs index 19cf15d40a3..da09117b1bb 100644 --- a/src/librustdoc/passes/lint/html_tags.rs +++ b/src/librustdoc/passes/lint/html_tags.rs @@ -1,9 +1,11 @@ //! Detects invalid HTML (like an unclosed `<span>`) in doc comments. +use std::borrow::Cow; use std::iter::Peekable; use std::ops::Range; use std::str::CharIndices; +use itertools::Itertools as _; use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd}; use rustc_hir::HirId; use rustc_resolve::rustdoc::source_span_for_markdown_range; @@ -101,7 +103,7 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: & }); }; - let mut tags = Vec::new(); + let mut tagp = TagParser::new(); let mut is_in_comment = None; let mut in_code_block = false; @@ -126,70 +128,65 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: & }; let p = Parser::new_with_broken_link_callback(dox, main_body_opts(), Some(&mut replacer)) - .into_offset_iter(); + .into_offset_iter() + .coalesce(|a, b| { + // for some reason, pulldown-cmark splits html blocks into separate events for each line. + // we undo this, in order to handle multi-line tags. + match (a, b) { + ((Event::Html(_), ra), (Event::Html(_), rb)) if ra.end == rb.start => { + let merged = ra.start..rb.end; + Ok((Event::Html(Cow::Borrowed(&dox[merged.clone()]).into()), merged)) + } + x => Err(x), + } + }); for (event, range) in p { match event { Event::Start(Tag::CodeBlock(_)) => in_code_block = true, Event::Html(text) | Event::InlineHtml(text) if !in_code_block => { - extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag) + tagp.extract_tags(&text, range, &mut is_in_comment, &report_diag) } Event::End(TagEnd::CodeBlock) => in_code_block = false, _ => {} } } - for (tag, range) in tags.iter().filter(|(t, _)| { - let t = t.to_lowercase(); - !ALLOWED_UNCLOSED.contains(&t.as_str()) - }) { - report_diag(format!("unclosed HTML tag `{tag}`"), range, true); - } - if let Some(range) = is_in_comment { report_diag("Unclosed HTML comment".to_string(), &range, false); + } else if let &Some(quote_pos) = &tagp.quote_pos { + let qr = Range { start: quote_pos, end: quote_pos }; + report_diag( + format!("unclosed quoted HTML attribute on tag `{}`", &tagp.tag_name), + &qr, + false, + ); + } else { + if !tagp.tag_name.is_empty() { + report_diag( + format!("incomplete HTML tag `{}`", &tagp.tag_name), + &(tagp.tag_start_pos..dox.len()), + false, + ); + } + for (tag, range) in tagp.tags.iter().filter(|(t, _)| { + let t = t.to_lowercase(); + !is_implicitly_self_closing(&t) + }) { + report_diag(format!("unclosed HTML tag `{tag}`"), range, true); + } } } +/// These tags are interpreted as self-closing if they lack an explicit closing tag. const ALLOWED_UNCLOSED: &[&str] = &[ "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr", ]; -fn drop_tag( - tags: &mut Vec<(String, Range<usize>)>, - tag_name: String, - range: Range<usize>, - f: &impl Fn(String, &Range<usize>, bool), -) { - let tag_name_low = tag_name.to_lowercase(); - if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) { - // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should - // be emitted. - let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| { - let at = at.to_lowercase(); - at == "script" || at == "style" - }); - for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) { - if should_not_warn { - continue; - } - let last_tag_name_low = last_tag_name.to_lowercase(); - if ALLOWED_UNCLOSED.contains(&last_tag_name_low.as_str()) { - continue; - } - // `tags` is used as a queue, meaning that everything after `pos` is included inside it. - // So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still - // have `h3`, meaning the tag wasn't closed as it should have. - f(format!("unclosed HTML tag `{last_tag_name}`"), &last_tag_span, true); - } - // Remove the `tag_name` that was originally closed - tags.pop(); - } else { - // It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required - // but it helps for the visualization). - f(format!("unopened HTML tag `{tag_name}`"), &range, false); - } +/// Allows constructs like `<img>`, but not `<img`. +fn is_implicitly_self_closing(tag_name: &str) -> bool { + ALLOWED_UNCLOSED.contains(&tag_name) } fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> { @@ -252,151 +249,292 @@ fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool { c.is_ascii_alphabetic() || !is_empty && (c == '-' || c.is_ascii_digit()) } -fn extract_html_tag( - tags: &mut Vec<(String, Range<usize>)>, - text: &str, - range: &Range<usize>, - start_pos: usize, - iter: &mut Peekable<CharIndices<'_>>, - f: &impl Fn(String, &Range<usize>, bool), -) { - let mut tag_name = String::new(); - let mut is_closing = false; - let mut prev_pos = start_pos; +/// Parse html tags to ensure they are well-formed +#[derive(Debug, Clone)] +struct TagParser { + tags: Vec<(String, Range<usize>)>, + /// Name of the tag that is being parsed, if we are within a tag. + /// + /// Since the `<` and name of a tag must appear on the same line with no whitespace, + /// if this is the empty string, we are not in a tag. + tag_name: String, + tag_start_pos: usize, + is_closing: bool, + /// `true` if we are within a tag, but not within its name. + in_attrs: bool, + /// If we are in a quoted attribute, what quote char does it use? + /// + /// This needs to be stored in the struct since HTML5 allows newlines in quoted attrs. + quote: Option<char>, + quote_pos: Option<usize>, + after_eq: bool, +} - loop { - let (pos, c) = match iter.peek() { - Some((pos, c)) => (*pos, *c), - // In case we reached the of the doc comment, we want to check that it's an - // unclosed HTML tag. For example "/// <h3". - None => (prev_pos, '\0'), - }; - prev_pos = pos; - // Checking if this is a closing tag (like `</a>` for `<a>`). - if c == '/' && tag_name.is_empty() { - is_closing = true; - } else if is_valid_for_html_tag_name(c, tag_name.is_empty()) { - tag_name.push(c); - } else { - if !tag_name.is_empty() { - let mut r = Range { start: range.start + start_pos, end: range.start + pos }; - if c == '>' { - // In case we have a tag without attribute, we can consider the span to - // refer to it fully. - r.end += 1; +impl TagParser { + fn new() -> Self { + Self { + tags: Vec::new(), + tag_name: String::with_capacity(8), + tag_start_pos: 0, + is_closing: false, + in_attrs: false, + quote: None, + quote_pos: None, + after_eq: false, + } + } + + fn drop_tag(&mut self, range: Range<usize>, f: &impl Fn(String, &Range<usize>, bool)) { + let tag_name_low = self.tag_name.to_lowercase(); + if let Some(pos) = self.tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) { + // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should + // be emitted. + let should_not_warn = self.tags.iter().take(pos + 1).any(|(at, _)| { + let at = at.to_lowercase(); + at == "script" || at == "style" + }); + for (last_tag_name, last_tag_span) in self.tags.drain(pos + 1..) { + if should_not_warn { + continue; } - if is_closing { - // In case we have "</div >" or even "</div >". - if c != '>' { - if !c.is_whitespace() { - // It seems like it's not a valid HTML tag. - break; - } - let mut found = false; - for (new_pos, c) in text[pos..].char_indices() { + let last_tag_name_low = last_tag_name.to_lowercase(); + if is_implicitly_self_closing(&last_tag_name_low) { + continue; + } + // `tags` is used as a queue, meaning that everything after `pos` is included inside it. + // So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still + // have `h3`, meaning the tag wasn't closed as it should have. + f(format!("unclosed HTML tag `{last_tag_name}`"), &last_tag_span, true); + } + // Remove the `tag_name` that was originally closed + self.tags.pop(); + } else { + // It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required + // but it helps for the visualization). + f(format!("unopened HTML tag `{}`", &self.tag_name), &range, false); + } + } + + /// Handle a `<` that appeared while parsing a tag. + fn handle_lt_in_tag( + &mut self, + range: Range<usize>, + lt_pos: usize, + f: &impl Fn(String, &Range<usize>, bool), + ) { + let global_pos = range.start + lt_pos; + // is this check needed? + if global_pos == self.tag_start_pos { + // `<` is in the tag because it is the start. + return; + } + // tried to start a new tag while in a tag + f( + format!("incomplete HTML tag `{}`", &self.tag_name), + &(self.tag_start_pos..global_pos), + false, + ); + self.tag_parsed(); + } + + fn extract_html_tag( + &mut self, + text: &str, + range: &Range<usize>, + start_pos: usize, + iter: &mut Peekable<CharIndices<'_>>, + f: &impl Fn(String, &Range<usize>, bool), + ) { + let mut prev_pos = start_pos; + + 'outer_loop: loop { + let (pos, c) = match iter.peek() { + Some((pos, c)) => (*pos, *c), + // In case we reached the of the doc comment, we want to check that it's an + // unclosed HTML tag. For example "/// <h3". + None if self.tag_name.is_empty() => (prev_pos, '\0'), + None => break, + }; + prev_pos = pos; + if c == '/' && self.tag_name.is_empty() { + // Checking if this is a closing tag (like `</a>` for `<a>`). + self.is_closing = true; + } else if !self.in_attrs && is_valid_for_html_tag_name(c, self.tag_name.is_empty()) { + self.tag_name.push(c); + } else { + if !self.tag_name.is_empty() { + self.in_attrs = true; + let mut r = Range { start: range.start + start_pos, end: range.start + pos }; + if c == '>' { + // In case we have a tag without attribute, we can consider the span to + // refer to it fully. + r.end += 1; + } + if self.is_closing { + // In case we have "</div >" or even "</div >". + if c != '>' { if !c.is_whitespace() { - if c == '>' { - r.end = range.start + new_pos + 1; - found = true; - } + // It seems like it's not a valid HTML tag. break; } - } - if !found { - break; - } - } - drop_tag(tags, tag_name, r, f); - } else { - let mut is_self_closing = false; - let mut quote_pos = None; - if c != '>' { - let mut quote = None; - let mut after_eq = false; - for (i, c) in text[pos..].char_indices() { - if !c.is_whitespace() { - if let Some(q) = quote { - if c == q { - quote = None; - quote_pos = None; - after_eq = false; + let mut found = false; + for (new_pos, c) in text[pos..].char_indices() { + if !c.is_whitespace() { + if c == '>' { + r.end = range.start + new_pos + 1; + found = true; + } else if c == '<' { + self.handle_lt_in_tag(range.clone(), pos + new_pos, f); } - } else if c == '>' { break; - } else if c == '/' && !after_eq { - is_self_closing = true; - } else { - if is_self_closing { - is_self_closing = false; - } - if (c == '"' || c == '\'') && after_eq { - quote = Some(c); - quote_pos = Some(pos + i); - } else if c == '=' { - after_eq = true; - } } - } else if quote.is_none() { - after_eq = false; + } + if !found { + break 'outer_loop; } } - } - if let Some(quote_pos) = quote_pos { - let qr = Range { start: quote_pos, end: quote_pos }; - f( - format!("unclosed quoted HTML attribute on tag `{tag_name}`"), - &qr, - false, - ); - } - if is_self_closing { - // https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus - let valid = ALLOWED_UNCLOSED.contains(&&tag_name[..]) - || tags.iter().take(pos + 1).any(|(at, _)| { - let at = at.to_lowercase(); - at == "svg" || at == "math" - }); - if !valid { - f(format!("invalid self-closing HTML tag `{tag_name}`"), &r, false); - } + self.drop_tag(r, f); + self.tag_parsed(); } else { - tags.push((tag_name, r)); + self.extract_opening_tag(text, range, r, pos, c, iter, f) } } + break; } - break; + iter.next(); } - iter.next(); } -} - -fn extract_tags( - tags: &mut Vec<(String, Range<usize>)>, - text: &str, - range: Range<usize>, - is_in_comment: &mut Option<Range<usize>>, - f: &impl Fn(String, &Range<usize>, bool), -) { - let mut iter = text.char_indices().peekable(); - while let Some((start_pos, c)) = iter.next() { - if is_in_comment.is_some() { - if text[start_pos..].starts_with("-->") { - *is_in_comment = None; + fn extract_opening_tag( + &mut self, + text: &str, + range: &Range<usize>, + r: Range<usize>, + pos: usize, + c: char, + iter: &mut Peekable<CharIndices<'_>>, + f: &impl Fn(String, &Range<usize>, bool), + ) { + // we can store this as a local, since html5 does require the `/` and `>` + // to not be separated by whitespace. + let mut is_self_closing = false; + if c != '>' { + 'parse_til_gt: { + for (i, c) in text[pos..].char_indices() { + if !c.is_whitespace() { + debug_assert_eq!(self.quote_pos.is_some(), self.quote.is_some()); + if let Some(q) = self.quote { + if c == q { + self.quote = None; + self.quote_pos = None; + self.after_eq = false; + } + } else if c == '>' { + break 'parse_til_gt; + } else if c == '<' { + self.handle_lt_in_tag(range.clone(), pos + i, f); + } else if c == '/' && !self.after_eq { + is_self_closing = true; + } else { + if is_self_closing { + is_self_closing = false; + } + if (c == '"' || c == '\'') && self.after_eq { + self.quote = Some(c); + self.quote_pos = Some(pos + i); + } else if c == '=' { + self.after_eq = true; + } + } + } else if self.quote.is_none() { + self.after_eq = false; + } + if !is_self_closing && !self.tag_name.is_empty() { + iter.next(); + } + } + // if we've run out of text but still haven't found a `>`, + // return early without calling `tag_parsed` or emitting lints. + // this allows us to either find the `>` in a later event + // or emit a lint about it being missing. + return; } - } else if c == '<' { - if text[start_pos..].starts_with("<!--") { - // We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!) - iter.next(); - iter.next(); - iter.next(); - *is_in_comment = Some(Range { - start: range.start + start_pos, - end: range.start + start_pos + 3, + } + if is_self_closing { + // https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus + let valid = ALLOWED_UNCLOSED.contains(&&self.tag_name[..]) + || self.tags.iter().take(pos + 1).any(|(at, _)| { + let at = at.to_lowercase(); + at == "svg" || at == "math" }); - } else { - extract_html_tag(tags, text, &range, start_pos, &mut iter, f); + if !valid { + f(format!("invalid self-closing HTML tag `{}`", self.tag_name), &r, false); + } + } else if !self.tag_name.is_empty() { + self.tags.push((std::mem::take(&mut self.tag_name), r)); + } + self.tag_parsed(); + } + /// Finished parsing a tag, reset related data. + fn tag_parsed(&mut self) { + self.tag_name.clear(); + self.is_closing = false; + self.in_attrs = false; + } + + fn extract_tags( + &mut self, + text: &str, + range: Range<usize>, + is_in_comment: &mut Option<Range<usize>>, + f: &impl Fn(String, &Range<usize>, bool), + ) { + let mut iter = text.char_indices().peekable(); + let mut prev_pos = 0; + loop { + if self.quote.is_some() { + debug_assert!(self.in_attrs && self.quote_pos.is_some()); + } + if self.in_attrs + && let Some(&(start_pos, _)) = iter.peek() + { + self.extract_html_tag(text, &range, start_pos, &mut iter, f); + // if no progress is being made, move forward forcefully. + if prev_pos == start_pos { + iter.next(); + } + prev_pos = start_pos; + continue; + } + let Some((start_pos, c)) = iter.next() else { break }; + if is_in_comment.is_some() { + if text[start_pos..].starts_with("-->") { + *is_in_comment = None; + } + } else if c == '<' { + // "<!--" is a valid attribute name under html5, so don't treat it as a comment if we're in a tag. + if self.tag_name.is_empty() && text[start_pos..].starts_with("<!--") { + // We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!) + iter.next(); + iter.next(); + iter.next(); + *is_in_comment = Some(Range { + start: range.start + start_pos, + end: range.start + start_pos + 4, + }); + } else { + if self.tag_name.is_empty() { + self.tag_start_pos = range.start + start_pos; + } + self.extract_html_tag(text, &range, start_pos, &mut iter, f); + } + } else if !self.tag_name.is_empty() { + // partially inside html tag that spans across events + self.extract_html_tag(text, &range, start_pos, &mut iter, f); } } } } + +#[cfg(test)] +mod tests; diff --git a/src/librustdoc/passes/lint/html_tags/tests.rs b/src/librustdoc/passes/lint/html_tags/tests.rs new file mode 100644 index 00000000000..81c1d21a55d --- /dev/null +++ b/src/librustdoc/passes/lint/html_tags/tests.rs @@ -0,0 +1,73 @@ +use std::cell::RefCell; + +use super::*; + +#[test] +fn test_extract_tags_nested_unclosed() { + let mut tagp = TagParser::new(); + let diags = RefCell::new(Vec::new()); + let dox = "<div>\n<br</div>"; + tagp.extract_tags(dox, 0..dox.len(), &mut None, &|s, r, b| { + diags.borrow_mut().push((s, r.clone(), b)); + }); + assert_eq!(diags.borrow().len(), 1, "did not get expected diagnostics: {diags:?}"); + assert_eq!(diags.borrow()[0].1, 6..9) +} + +#[test] +fn test_extract_tags_taglike_in_attr() { + let mut tagp = TagParser::new(); + let diags = RefCell::new(Vec::new()); + let dox = "<img src='<div>'>"; + tagp.extract_tags(dox, 0..dox.len(), &mut None, &|s, r, b| { + diags.borrow_mut().push((s, r.clone(), b)); + }); + assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}"); +} + +#[test] +fn test_extract_tags_taglike_in_multiline_attr() { + let mut tagp = TagParser::new(); + let diags = RefCell::new(Vec::new()); + let dox = "<img src=\"\nasd\n<div>\n\">"; + tagp.extract_tags(dox, 0..dox.len(), &mut None, &|s, r, b| { + diags.borrow_mut().push((s, r.clone(), b)); + }); + assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}"); +} + +#[test] +fn test_extract_tags_taglike_in_multievent_attr() { + let mut tagp = TagParser::new(); + let diags = RefCell::new(Vec::new()); + let dox = "<img src='<div>'>"; + let split_point = 10; + let mut p = |range: Range<usize>| { + tagp.extract_tags(&dox[range.clone()], range, &mut None, &|s, r, b| { + diags.borrow_mut().push((s, r.clone(), b)); + }) + }; + p(0..split_point); + p(split_point..dox.len()); + assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}"); +} + +#[test] +fn test_extract_tags_taglike_in_multiline_multievent_attr() { + let mut tagp = TagParser::new(); + let diags = RefCell::new(Vec::new()); + let dox = "<img src='\n foo:\n </div>\n <p/>\n <div>\n'>"; + let mut p = |range: Range<usize>| { + tagp.extract_tags(&dox[range.clone()], range, &mut None, &|s, r, b| { + diags.borrow_mut().push((s, r.clone(), b)); + }) + }; + let mut offset = 0; + for ln in dox.split_inclusive('\n') { + let new_offset = offset + ln.len(); + p(offset..new_offset); + offset = new_offset; + } + assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}"); + assert_eq!(tagp.tags.len(), 1); +} diff --git a/tests/rustdoc-ui/lints/invalid-html-tags.rs b/tests/rustdoc-ui/lints/invalid-html-tags.rs index 317f1fd1d46..8003e5efdd5 100644 --- a/tests/rustdoc-ui/lints/invalid-html-tags.rs +++ b/tests/rustdoc-ui/lints/invalid-html-tags.rs @@ -43,7 +43,7 @@ pub fn b() {} /// <h3> //~^ ERROR unclosed HTML tag `h3` /// <script -//~^ ERROR unclosed HTML tag `script` +//~^ ERROR incomplete HTML tag `script` pub fn c() {} // Unclosed tags shouldn't warn if they are nested inside a <script> elem. @@ -72,6 +72,7 @@ pub fn e() {} /// <div></div > /// <div></div //~^ ERROR unclosed HTML tag `div` +//~| ERROR incomplete HTML tag `div` pub fn f() {} /// <!----> @@ -105,7 +106,7 @@ pub fn j() {} /// uiapp.run(&env::args().collect::<Vec<_>>()); /// ``` /// -/// <Vec<_> shouldn't warn! +// <Vec<_> shouldn't warn! /// `````` pub fn k() {} @@ -121,3 +122,92 @@ pub fn no_error_1() {} /// backslashed \<<a href=""> //~^ ERROR unclosed HTML tag `a` pub fn p() {} + +/// <svg width="512" height="512" viewBox="0 0 512" fill="none" xmlns="http://www.w3.org/2000/svg"> +/// <rect +/// width="256" +/// height="256" +/// fill="#5064C8" +/// stroke="black" +/// /> +/// </svg> +pub fn no_error_2() {} + +/// <div> +/// <img +/// src="https://example.com/ferris.png" +/// width="512" +/// height="512" +/// /> +/// </div> +pub fn no_error_3() {} + +/// > <div +/// > class="foo"> +/// > </div> +pub fn no_error_4() {} + +/// unfinished ALLOWED_UNCLOSED +/// +/// note: CommonMark doesn't allow an html block to start with a multiline tag, +/// so we use `<br>` a bunch to force these to be parsed as html blocks. +/// +/// <br> +/// <img +//~^ ERROR incomplete HTML tag `img` +pub fn q() {} + +/// nested unfinished ALLOWED_UNCLOSED +/// <p><img</p> +//~^ ERROR incomplete HTML tag `img` +pub fn r() {} + +/// > <br> +/// > <img +//~^ ERROR incomplete HTML tag `img` +/// > href="#broken" +pub fn s() {} + +/// <br> +/// <br<br> +//~^ ERROR incomplete HTML tag `br` +pub fn t() {} + +/// <br> +/// <br +//~^ ERROR incomplete HTML tag `br` +pub fn u() {} + +/// <a href=">" alt="<">html5 allows this</a> +pub fn no_error_5() {} + +/// <br> +/// <img title=" +/// html5 +/// allows +/// multiline +/// attr +/// values +/// these are just text, not tags: +/// </div> +/// <p/> +/// <div> +/// "> +pub fn no_error_6() {} + +/// <br> +/// <a href="data:text/html,<!DOCTYPE> +/// <html> +/// <body><b>this is allowed for some reason</b></body> +/// </html> +/// ">what</a> +pub fn no_error_7() {} + +/// Technically this is allowed per the html5 spec, +/// but there's basically no legitemate reason to do it, +/// so we don't allow it. +/// +/// <p <!-->foobar</p> +//~^ ERROR Unclosed HTML comment +//~| ERROR incomplete HTML tag `p` +pub fn v() {} diff --git a/tests/rustdoc-ui/lints/invalid-html-tags.stderr b/tests/rustdoc-ui/lints/invalid-html-tags.stderr index 9c2bfcf2c3d..b6ec22c2479 100644 --- a/tests/rustdoc-ui/lints/invalid-html-tags.stderr +++ b/tests/rustdoc-ui/lints/invalid-html-tags.stderr @@ -52,6 +52,12 @@ error: unclosed HTML tag `p` LL | /// <br/> <p> | ^^^ +error: incomplete HTML tag `script` + --> $DIR/invalid-html-tags.rs:45:5 + | +LL | /// <script + | ^^^^^^^ + error: unclosed HTML tag `div` --> $DIR/invalid-html-tags.rs:41:5 | @@ -64,11 +70,11 @@ error: unclosed HTML tag `h3` LL | /// <h3> | ^^^^ -error: unclosed HTML tag `script` - --> $DIR/invalid-html-tags.rs:45:5 +error: incomplete HTML tag `div` + --> $DIR/invalid-html-tags.rs:73:10 | -LL | /// <script - | ^^^^^^ +LL | /// <div></div + | ^^^^^ error: unclosed HTML tag `div` --> $DIR/invalid-html-tags.rs:73:5 @@ -77,28 +83,73 @@ LL | /// <div></div | ^^^^^ error: Unclosed HTML comment - --> $DIR/invalid-html-tags.rs:87:5 + --> $DIR/invalid-html-tags.rs:88:5 | LL | /// <!-- - | ^^^ + | ^^^^ error: unopened HTML tag `unopened-tag` - --> $DIR/invalid-html-tags.rs:114:26 + --> $DIR/invalid-html-tags.rs:115:26 | LL | /// Web Components style </unopened-tag> | ^^^^^^^^^^^^^^^ error: unclosed HTML tag `dashed-tags` - --> $DIR/invalid-html-tags.rs:112:26 + --> $DIR/invalid-html-tags.rs:113:26 | LL | /// Web Components style <dashed-tags> | ^^^^^^^^^^^^^ error: unclosed HTML tag `a` - --> $DIR/invalid-html-tags.rs:121:19 + --> $DIR/invalid-html-tags.rs:122:19 | LL | /// backslashed \<<a href=""> | ^^ -error: aborting due to 16 previous errors +error: incomplete HTML tag `img` + --> $DIR/invalid-html-tags.rs:156:5 + | +LL | /// <img + | ^^^^ + +error: incomplete HTML tag `img` + --> $DIR/invalid-html-tags.rs:161:8 + | +LL | /// <p><img</p> + | ^^^^ + +error: incomplete HTML tag `img` + --> $DIR/invalid-html-tags.rs:166:7 + | +LL | /// > <img + | _______^ +LL | | +LL | | /// > href="#broken" + | |____________________^ + +error: incomplete HTML tag `br` + --> $DIR/invalid-html-tags.rs:172:5 + | +LL | /// <br<br> + | ^^^ + +error: incomplete HTML tag `br` + --> $DIR/invalid-html-tags.rs:177:5 + | +LL | /// <br + | ^^^ + +error: incomplete HTML tag `p` + --> $DIR/invalid-html-tags.rs:210:5 + | +LL | /// <p <!-->foobar</p> + | ^^^ + +error: Unclosed HTML comment + --> $DIR/invalid-html-tags.rs:210:8 + | +LL | /// <p <!-->foobar</p> + | ^^^^ + +error: aborting due to 24 previous errors |
