about summary refs log tree commit diff
diff options
context:
space:
mode:
authorGuillaume Gomez <guillaume1.gomez@gmail.com>2020-09-28 11:40:31 +0200
committerGuillaume Gomez <guillaume1.gomez@gmail.com>2020-10-03 14:16:24 +0200
commit5bc148957efc0bf4640d3d6b3e1824c00f1156e8 (patch)
treeb2b87b4e49c288f1bee17f3bdf08ef743e135746
parent30cabfd215f374b855f6d6473b00a55c3511c3e2 (diff)
downloadrust-5bc148957efc0bf4640d3d6b3e1824c00f1156e8.tar.gz
rust-5bc148957efc0bf4640d3d6b3e1824c00f1156e8.zip
Correctly handle unicode characters and tags being open just before the end of the doc comment
-rw-r--r--src/librustdoc/passes/html_tags.rs58
-rw-r--r--src/test/rustdoc-ui/invalid-html-tags.rs7
-rw-r--r--src/test/rustdoc-ui/invalid-html-tags.stderr36
3 files changed, 72 insertions, 29 deletions
diff --git a/src/librustdoc/passes/html_tags.rs b/src/librustdoc/passes/html_tags.rs
index 490e913fbde..f8869a41eb6 100644
--- a/src/librustdoc/passes/html_tags.rs
+++ b/src/librustdoc/passes/html_tags.rs
@@ -45,14 +45,22 @@ fn drop_tag(
     range: Range<usize>,
     f: &impl Fn(&str, &Range<usize>),
 ) {
-    if let Some(pos) = tags.iter().rev().position(|(t, _)| *t == tag_name) {
+    let tag_name_low = tag_name.to_lowercase();
+    if let Some(pos) = tags.iter().rev().position(|(t, _)| t.to_lowercase() == tag_name_low) {
         // Because this is from a `rev` iterator, the position is reversed as well!
         let pos = tags.len() - 1 - pos;
-        // If the tag is nested inside a "<script>", not warning should be emitted.
-        let should_not_warn =
-            tags.iter().take(pos + 1).any(|(at, _)| at == "script" || at == "style");
+        // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should
+        // be emitted.
+        let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| {
+            let at = at.to_lowercase();
+            at == "script" || at == "style"
+        });
         for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) {
-            if should_not_warn || ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name) {
+            if should_not_warn {
+                continue;
+            }
+            let last_tag_name_low = last_tag_name.to_lowercase();
+            if ALLOWED_UNCLOSED.iter().any(|&at| at == &last_tag_name_low) {
                 continue;
             }
             // `tags` is used as a queue, meaning that everything after `pos` is included inside it.
@@ -77,21 +85,29 @@ fn extract_tag(
 ) {
     let mut iter = text.chars().enumerate().peekable();
 
-    while let Some((start_pos, c)) = iter.next() {
+    'top: while let Some((start_pos, c)) = iter.next() {
         if c == '<' {
             let mut tag_name = String::new();
             let mut is_closing = false;
-            while let Some((pos, c)) = iter.peek() {
+            let mut prev_pos = start_pos;
+            loop {
+                let (pos, c) = match iter.peek() {
+                    Some((pos, c)) => (*pos, *c),
+                    // In case we reached the of the doc comment, we want to check that it's an
+                    // unclosed HTML tag. For example "/// <h3".
+                    None => (prev_pos, '\0'),
+                };
+                prev_pos = pos;
                 // Checking if this is a closing tag (like `</a>` for `<a>`).
-                if *c == '/' && tag_name.is_empty() {
+                if c == '/' && tag_name.is_empty() {
                     is_closing = true;
-                } else if c.is_ascii_alphanumeric() && !c.is_ascii_uppercase() {
-                    tag_name.push(*c);
+                } else if c.is_ascii_alphanumeric() {
+                    tag_name.push(c);
                 } else {
                     if !tag_name.is_empty() {
                         let mut r =
                             Range { start: range.start + start_pos, end: range.start + pos };
-                        if *c == '>' {
+                        if c == '>' {
                             // In case we have a tag without attribute, we can consider the span to
                             // refer to it fully.
                             r.end += 1;
@@ -102,11 +118,20 @@ fn extract_tag(
                             tags.push((tag_name, r));
                         }
                     }
-                    break;
+                    continue 'top;
+                }
+                // Some chars like 💩 are longer than 1 character, so we need to skip the other
+                // bytes as well to prevent stopping "in the middle" of a char.
+                for _ in 0..c.len_utf8() {
+                    iter.next();
                 }
-                iter.next();
             }
         }
+        // Some chars like 💩 are longer than 1 character, so we need to skip the other
+        // bytes as well to prevent stopping "in the middle" of a char.
+        for _ in 0..c.len_utf8() - 1 {
+            iter.next();
+        }
     }
 }
 
@@ -143,9 +168,10 @@ impl<'a, 'tcx> DocFolder for InvalidHtmlTagsLinter<'a, 'tcx> {
                 }
             }
 
-            for (tag, range) in
-                tags.iter().filter(|(t, _)| ALLOWED_UNCLOSED.iter().find(|&at| at == t).is_none())
-            {
+            for (tag, range) in tags.iter().filter(|(t, _)| {
+                let t = t.to_lowercase();
+                ALLOWED_UNCLOSED.iter().find(|&&at| at == t).is_none()
+            }) {
                 report_diag(&format!("unclosed HTML tag `{}`", tag), range);
             }
         }
diff --git a/src/test/rustdoc-ui/invalid-html-tags.rs b/src/test/rustdoc-ui/invalid-html-tags.rs
index 0dc2002bd39..a318b8ceca2 100644
--- a/src/test/rustdoc-ui/invalid-html-tags.rs
+++ b/src/test/rustdoc-ui/invalid-html-tags.rs
@@ -1,5 +1,8 @@
 #![deny(invalid_html_tags)]
 
+//! <p>💩<p>
+//~^ ERROR unclosed HTML tag `p`
+
 /// <img><input>
 /// <script>
 /// <img><input>
@@ -38,6 +41,8 @@ pub fn b() {}
 //~^ ERROR unclosed HTML tag `div`
 ///   <h3>
 //~^ ERROR unclosed HTML tag `h3`
+/// <script
+//~^ ERROR unclosed HTML tag `script`
 pub fn c() {}
 
 // Unclosed tags shouldn't warn if they are nested inside a <script> elem.
@@ -55,7 +60,7 @@ pub fn d() {}
 /// <style>
 ///   <h3><div>
 /// </style>
-/// <style>
+/// <stYle>
 ///   <div>
 ///     <p>
 ///   </div>
diff --git a/src/test/rustdoc-ui/invalid-html-tags.stderr b/src/test/rustdoc-ui/invalid-html-tags.stderr
index d2e9704a227..70d577bbfa9 100644
--- a/src/test/rustdoc-ui/invalid-html-tags.stderr
+++ b/src/test/rustdoc-ui/invalid-html-tags.stderr
@@ -1,8 +1,8 @@
-error: unclosed HTML tag `unknown`
-  --> $DIR/invalid-html-tags.rs:7:5
+error: unclosed HTML tag `p`
+  --> $DIR/invalid-html-tags.rs:3:5
    |
-LL | /// <unknown>
-   |     ^^^^^^^^^
+LL | //! <p>💩<p>
+   |     ^^^
    |
 note: the lint level is defined here
   --> $DIR/invalid-html-tags.rs:1:9
@@ -10,47 +10,59 @@ note: the lint level is defined here
 LL | #![deny(invalid_html_tags)]
    |         ^^^^^^^^^^^^^^^^^
 
-error: unclosed HTML tag `script`
+error: unclosed HTML tag `unknown`
   --> $DIR/invalid-html-tags.rs:10:5
    |
+LL | /// <unknown>
+   |     ^^^^^^^^^
+
+error: unclosed HTML tag `script`
+  --> $DIR/invalid-html-tags.rs:13:5
+   |
 LL | /// <script>
    |     ^^^^^^^^
 
 error: unclosed HTML tag `h2`
-  --> $DIR/invalid-html-tags.rs:15:7
+  --> $DIR/invalid-html-tags.rs:18:7
    |
 LL | ///   <h2>
    |       ^^^^
 
 error: unclosed HTML tag `h3`
-  --> $DIR/invalid-html-tags.rs:17:9
+  --> $DIR/invalid-html-tags.rs:20:9
    |
 LL | ///     <h3>
    |         ^^^^
 
 error: unopened HTML tag `hello`
-  --> $DIR/invalid-html-tags.rs:20:5
+  --> $DIR/invalid-html-tags.rs:23:5
    |
 LL | /// </hello>
    |     ^^^^^^^^
 
 error: unclosed HTML tag `p`
-  --> $DIR/invalid-html-tags.rs:25:14
+  --> $DIR/invalid-html-tags.rs:28:14
    |
 LL | ///    <br/> <p>
    |              ^^^
 
 error: unclosed HTML tag `div`
-  --> $DIR/invalid-html-tags.rs:37:5
+  --> $DIR/invalid-html-tags.rs:40:5
    |
 LL | /// <div style="hello">
    |     ^^^^
 
 error: unclosed HTML tag `h3`
-  --> $DIR/invalid-html-tags.rs:39:7
+  --> $DIR/invalid-html-tags.rs:42:7
    |
 LL | ///   <h3>
    |       ^^^^
 
-error: aborting due to 8 previous errors
+error: unclosed HTML tag `script`
+  --> $DIR/invalid-html-tags.rs:44:5
+   |
+LL | /// <script
+   |     ^^^^^^
+
+error: aborting due to 10 previous errors