diff options
| author | Guillaume Gomez <guillaume1.gomez@gmail.com> | 2020-09-28 11:40:31 +0200 |
|---|---|---|
| committer | Guillaume Gomez <guillaume1.gomez@gmail.com> | 2020-10-03 14:16:24 +0200 |
| commit | 5bc148957efc0bf4640d3d6b3e1824c00f1156e8 (patch) | |
| tree | b2b87b4e49c288f1bee17f3bdf08ef743e135746 | |
| parent | 30cabfd215f374b855f6d6473b00a55c3511c3e2 (diff) | |
| download | rust-5bc148957efc0bf4640d3d6b3e1824c00f1156e8.tar.gz rust-5bc148957efc0bf4640d3d6b3e1824c00f1156e8.zip | |
Correctly handle unicode characters and tags being open just before the end of the doc comment
| -rw-r--r-- | src/librustdoc/passes/html_tags.rs | 58 | ||||
| -rw-r--r-- | src/test/rustdoc-ui/invalid-html-tags.rs | 7 | ||||
| -rw-r--r-- | src/test/rustdoc-ui/invalid-html-tags.stderr | 36 |
3 files changed, 72 insertions, 29 deletions
diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs index 490e913fbde..f8869a41eb6 100644 --- a/src/librustdoc/passes/html_tags.rs +++ b/src/librustdoc/passes/html_tags.rs @@ -45,14 +45,22 @@ fn drop_tag( range: Range<usize>, f: &impl Fn(&str, &Range<usize>), ) { - if let Some(pos) = tags.iter().rev().position(|(t, _)| *t == tag_name) { + let tag_name_low = tag_name.to_lowercase(); + if let Some(pos) = tags.iter().rev().position(|(t, _)| t.to_lowercase() == tag_name_low) { // Because this is from a `rev` iterator, the position is reversed as well! let pos = tags.len() - 1 - pos; - // If the tag is nested inside a "<script>", not warning should be emitted. - let should_not_warn = - tags.iter().take(pos + 1).any(|(at, _)| at == "script" || at == "style"); + // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should + // be emitted. + let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| { + let at = at.to_lowercase(); + at == "script" || at == "style" + }); for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) { - if should_not_warn || ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name) { + if should_not_warn { + continue; + } + let last_tag_name_low = last_tag_name.to_lowercase(); + if ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name_low) { continue; } // `tags` is used as a queue, meaning that everything after `pos` is included inside it. @@ -77,21 +85,29 @@ fn extract_tag( ) { let mut iter = text.chars().enumerate().peekable(); - while let Some((start_pos, c)) = iter.next() { + 'top: while let Some((start_pos, c)) = iter.next() { if c == '<' { let mut tag_name = String::new(); let mut is_closing = false; - while let Some((pos, c)) = iter.peek() { + let mut prev_pos = start_pos; + loop { + let (pos, c) = match iter.peek() { + Some((pos, c)) => (*pos, *c), + // In case we reached the of the doc comment, we want to check that it's an + // unclosed HTML tag. For example "/// <h3". + None => (prev_pos, '\0'), + }; + prev_pos = pos; // Checking if this is a closing tag (like `</a>` for `<a>`). - if *c == '/' && tag_name.is_empty() { + if c == '/' && tag_name.is_empty() { is_closing = true; - } else if c.is_ascii_alphanumeric() && !c.is_ascii_uppercase() { - tag_name.push(*c); + } else if c.is_ascii_alphanumeric() { + tag_name.push(c); } else { if !tag_name.is_empty() { let mut r = Range { start: range.start + start_pos, end: range.start + pos }; - if *c == '>' { + if c == '>' { // In case we have a tag without attribute, we can consider the span to // refer to it fully. r.end += 1; @@ -102,11 +118,20 @@ fn extract_tag( tags.push((tag_name, r)); } } - break; + continue 'top; + } + // Some chars like 💩 are longer than 1 character, so we need to skip the other + // bytes as well to prevent stopping "in the middle" of a char. + for _ in 0..c.len_utf8() { + iter.next(); } - iter.next(); } } + // Some chars like 💩 are longer than 1 character, so we need to skip the other + // bytes as well to prevent stopping "in the middle" of a char. + for _ in 0..c.len_utf8() - 1 { + iter.next(); + } } } @@ -143,9 +168,10 @@ impl<'a, 'tcx> DocFolder for InvalidHtmlTagsLinter<'a, 'tcx> { } } - for (tag, range) in - tags.iter().filter(|(t, _)| ALLOWED_UNCLOSED.iter().find(|&at| at == t).is_none()) - { + for (tag, range) in tags.iter().filter(|(t, _)| { + let t = t.to_lowercase(); + ALLOWED_UNCLOSED.iter().find(|&&at| at == t).is_none() + }) { report_diag(&format!("unclosed HTML tag `{}`", tag), range); } } diff --git a/src/test/rustdoc-ui/invalid-html-tags.rs b/src/test/rustdoc-ui/invalid-html-tags.rs index 0dc2002bd39..a318b8ceca2 100644 --- a/src/test/rustdoc-ui/invalid-html-tags.rs +++ b/src/test/rustdoc-ui/invalid-html-tags.rs @@ -1,5 +1,8 @@ #![deny(invalid_html_tags)] +//! <p>💩<p> +//~^ ERROR unclosed HTML tag `p` + /// <img><input> /// <script> /// <img><input> @@ -38,6 +41,8 @@ pub fn b() {} //~^ ERROR unclosed HTML tag `div` /// <h3> //~^ ERROR unclosed HTML tag `h3` +/// <script +//~^ ERROR unclosed HTML tag `script` pub fn c() {} // Unclosed tags shouldn't warn if they are nested inside a <script> elem. @@ -55,7 +60,7 @@ pub fn d() {} /// <style> /// <h3><div> /// </style> -/// <style> +/// <stYle> /// <div> /// <p> /// </div> diff --git a/src/test/rustdoc-ui/invalid-html-tags.stderr b/src/test/rustdoc-ui/invalid-html-tags.stderr index d2e9704a227..70d577bbfa9 100644 --- a/src/test/rustdoc-ui/invalid-html-tags.stderr +++ b/src/test/rustdoc-ui/invalid-html-tags.stderr @@ -1,8 +1,8 @@ -error: unclosed HTML tag `unknown` - --> $DIR/invalid-html-tags.rs:7:5 +error: unclosed HTML tag `p` + --> $DIR/invalid-html-tags.rs:3:5 | -LL | /// <unknown> - | ^^^^^^^^^ +LL | //! <p>💩<p> + | ^^^ | note: the lint level is defined here --> $DIR/invalid-html-tags.rs:1:9 @@ -10,47 +10,59 @@ note: the lint level is defined here LL | #![deny(invalid_html_tags)] | ^^^^^^^^^^^^^^^^^ -error: unclosed HTML tag `script` +error: unclosed HTML tag `unknown` --> $DIR/invalid-html-tags.rs:10:5 | +LL | /// <unknown> + | ^^^^^^^^^ + +error: unclosed HTML tag `script` + --> $DIR/invalid-html-tags.rs:13:5 + | LL | /// <script> | ^^^^^^^^ error: unclosed HTML tag `h2` - --> $DIR/invalid-html-tags.rs:15:7 + --> $DIR/invalid-html-tags.rs:18:7 | LL | /// <h2> | ^^^^ error: unclosed HTML tag `h3` - --> $DIR/invalid-html-tags.rs:17:9 + --> $DIR/invalid-html-tags.rs:20:9 | LL | /// <h3> | ^^^^ error: unopened HTML tag `hello` - --> $DIR/invalid-html-tags.rs:20:5 + --> $DIR/invalid-html-tags.rs:23:5 | LL | /// </hello> | ^^^^^^^^ error: unclosed HTML tag `p` - --> $DIR/invalid-html-tags.rs:25:14 + --> $DIR/invalid-html-tags.rs:28:14 | LL | /// <br/> <p> | ^^^ error: unclosed HTML tag `div` - --> $DIR/invalid-html-tags.rs:37:5 + --> $DIR/invalid-html-tags.rs:40:5 | LL | /// <div style="hello"> | ^^^^ error: unclosed HTML tag `h3` - --> $DIR/invalid-html-tags.rs:39:7 + --> $DIR/invalid-html-tags.rs:42:7 | LL | /// <h3> | ^^^^ -error: aborting due to 8 previous errors +error: unclosed HTML tag `script` + --> $DIR/invalid-html-tags.rs:44:5 + | +LL | /// <script + | ^^^^^^ + +error: aborting due to 10 previous errors |
