Rollup merge of #78400 - GuillaumeGomez:fix-unindent, r=jyn514

Fix unindent in doc comments Fixes #70732 r? ``@jyn514``
author: Yuki Okushi <huyuumi.dev@gmail.com> 2020-11-03 15:27:05 +0900
committer: GitHub <noreply@github.com> 2020-11-03 15:27:05 +0900
commit: e731a5a7f55b77e75573068d788ce61033b9bb62 (patch)
tree: 53ea1ea8a9bf39b304569094dbc0619e64db388a
parent: 0716724a0b29269ba3b79abb65f1f0505e5bb0ec (diff)
parent: 87f28978c532547df7e8d746ad98f863a202bab0 (diff)
download: rust-e731a5a7f55b77e75573068d788ce61033b9bb62.tar.gz
rust-e731a5a7f55b77e75573068d788ce61033b9bb62.zip
4 files changed, 163 insertions, 92 deletions
diff --git a/src/librustdoc/passes/unindent_comments.rs b/src/librustdoc/passes/unindent_comments.rs
index a9cf5a87f54..51c380f438c 100644
--- a/src/librustdoc/passes/unindent_comments.rs
+++ b/src/librustdoc/passes/unindent_comments.rs
@@ -1,7 +1,6 @@
 use std::cmp;
-use std::string::String;
 
-use crate::clean::{self, DocFragment, Item};
+use crate::clean::{self, DocFragment, DocFragmentKind, Item};
 use crate::core::DocContext;
 use crate::fold::{self, DocFolder};
 use crate::passes::Pass;
@@ -35,65 +34,81 @@ impl clean::Attributes {
 }
 
 fn unindent_fragments(docs: &mut Vec<DocFragment>) {
-    for fragment in docs {
-        fragment.doc = unindent(&fragment.doc);
-    }
-}
-
-fn unindent(s: &str) -> String {
-    let lines = s.lines().collect::<Vec<&str>>();
-    let mut saw_first_line = false;
-    let mut saw_second_line = false;
-    let min_indent = lines.iter().fold(usize::MAX, |min_indent, line| {
-        // After we see the first non-whitespace line, look at
-        // the line we have. If it is not whitespace, and therefore
-        // part of the first paragraph, then ignore the indentation
-        // level of the first line
-        let ignore_previous_indents =
-            saw_first_line && !saw_second_line && !line.chars().all(|c| c.is_whitespace());
+    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
+    // fragments kind's lines are never starting with a whitespace unless they are using some
+    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
+    // we need to take into account the fact that the minimum indent minus one (to take this
+    // whitespace into account).
+    //
+    // For example:
+    //
+    // /// hello!
+    // #[doc = "another"]
+    //
+    // In this case, you want "hello! another" and not "hello!  another".
+    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
+        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
+    {
+        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
+        // "decide" how much the minimum indent will be.
+        1
+    } else {
+        0
+    };
 
-        let min_indent = if ignore_previous_indents { usize::MAX } else { min_indent };
+    // `min_indent` is used to know how much whitespaces from the start of each lines must be
+    // removed. Example:
+    //
+    // ///     hello!
+    // #[doc = "another"]
+    //
+    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
+    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
+    // (5 - 1) whitespaces.
+    let min_indent = match docs
+        .iter()
+        .map(|fragment| {
+            fragment.doc.lines().fold(usize::MAX, |min_indent, line| {
+                if line.chars().all(|c| c.is_whitespace()) {
+                    min_indent
+                } else {
+                    // Compare against either space or tab, ignoring whether they are
+                    // mixed or not.
+                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
+                    cmp::min(min_indent, whitespace)
+                        + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
+                }
+            })
+        })
+        .min()
+    {
+        Some(x) => x,
+        None => return,
+    };
 
-        if saw_first_line {
-            saw_second_line = true;
+    for fragment in docs {
+        if fragment.doc.lines().count() == 0 {
+            continue;
         }
 
-        if line.chars().all(|c| c.is_whitespace()) {
-            min_indent
+        let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
+            min_indent - add
         } else {
-            saw_first_line = true;
-            let mut whitespace = 0;
-            line.chars().all(|char| {
-                // Compare against either space or tab, ignoring whether they
-                // are mixed or not
-                if char == ' ' || char == '\t' {
-                    whitespace += 1;
-                    true
+            min_indent
+        };
+
+        fragment.doc = fragment
+            .doc
+            .lines()
+            .map(|line| {
+                if line.chars().all(|c| c.is_whitespace()) {
+                    line.to_string()
                 } else {
-                    false
+                    assert!(line.len() >= min_indent);
+                    line[min_indent..].to_string()
                 }
-            });
-            cmp::min(min_indent, whitespace)
-        }
-    });
-
-    if !lines.is_empty() {
-        let mut unindented = vec![lines[0].trim_start().to_string()];
-        unindented.extend_from_slice(
-            &lines[1..]
-                .iter()
-                .map(|&line| {
-                    if line.chars().all(|c| c.is_whitespace()) {
-                        line.to_string()
-                    } else {
-                        assert!(line.len() >= min_indent);
-                        line[min_indent..].to_string()
-                    }
-                })
-                .collect::<Vec<_>>(),
-        );
-        unindented.join("\n")
-    } else {
-        s.to_string()
+            })
+            .collect::<Vec<_>>()
+            .join("\n");
     }
 }
diff --git a/src/librustdoc/passes/unindent_comments/tests.rs b/src/librustdoc/passes/unindent_comments/tests.rs
index c39c03e1249..9dec71f7683 100644
--- a/src/librustdoc/passes/unindent_comments/tests.rs
+++ b/src/librustdoc/passes/unindent_comments/tests.rs
@@ -1,72 +1,63 @@
 use super::*;
+use rustc_span::source_map::DUMMY_SP;
+
+fn create_doc_fragment(s: &str) -> Vec<DocFragment> {
+    vec![DocFragment {
+        line: 0,
+        span: DUMMY_SP,
+        parent_module: None,
+        doc: s.to_string(),
+        kind: DocFragmentKind::SugaredDoc,
+    }]
+}
+
+#[track_caller]
+fn run_test(input: &str, expected: &str) {
+    let mut s = create_doc_fragment(input);
+    unindent_fragments(&mut s);
+    assert_eq!(s[0].doc, expected);
+}
 
 #[test]
 fn should_unindent() {
-    let s = "    line1\n    line2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1\nline2");
+    run_test("    line1\n    line2", "line1\nline2");
 }
 
 #[test]
 fn should_unindent_multiple_paragraphs() {
-    let s = "    line1\n\n    line2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1\n\nline2");
+    run_test("    line1\n\n    line2", "line1\n\nline2");
 }
 
 #[test]
 fn should_leave_multiple_indent_levels() {
     // Line 2 is indented another level beyond the
     // base indentation and should be preserved
-    let s = "    line1\n\n        line2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1\n\n    line2");
+    run_test("    line1\n\n        line2", "line1\n\n    line2");
 }
 
 #[test]
 fn should_ignore_first_line_indent() {
-    // The first line of the first paragraph may not be indented as
-    // far due to the way the doc string was written:
-    //
-    // #[doc = "Start way over here
-    //          and continue here"]
-    let s = "line1\n    line2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1\nline2");
+    run_test("line1\n    line2", "line1\n    line2");
 }
 
 #[test]
 fn should_not_ignore_first_line_indent_in_a_single_line_para() {
-    let s = "line1\n\n    line2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1\n\n    line2");
+    run_test("line1\n\n    line2", "line1\n\n    line2");
 }
 
 #[test]
 fn should_unindent_tabs() {
-    let s = "\tline1\n\tline2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1\nline2");
+    run_test("\tline1\n\tline2", "line1\nline2");
 }
 
 #[test]
 fn should_trim_mixed_indentation() {
-    let s = "\t    line1\n\t    line2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1\nline2");
-
-    let s = "    \tline1\n    \tline2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1\nline2");
+    run_test("\t    line1\n\t    line2", "line1\nline2");
+    run_test("    \tline1\n    \tline2", "line1\nline2");
 }
 
 #[test]
 fn should_not_trim() {
-    let s = "\t    line1  \n\t    line2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1  \nline2");
-
-    let s = "    \tline1  \n    \tline2".to_string();
-    let r = unindent(&s);
-    assert_eq!(r, "line1  \nline2");
+    run_test("\t    line1  \n\t    line2", "line1  \nline2");
+    run_test("    \tline1  \n    \tline2", "line1  \nline2");
 }
diff --git a/src/test/rustdoc/unindent.md b/src/test/rustdoc/unindent.md
new file mode 100644
index 00000000000..8e4e7a25af8
--- /dev/null
+++ b/src/test/rustdoc/unindent.md
@@ -0,0 +1 @@
+Just some text.
diff --git a/src/test/rustdoc/unindent.rs b/src/test/rustdoc/unindent.rs
new file mode 100644
index 00000000000..d10e1ec89c5
--- /dev/null
+++ b/src/test/rustdoc/unindent.rs
@@ -0,0 +1,64 @@
+#![feature(external_doc)]
+
+#![crate_name = "foo"]
+
+// @has foo/struct.Example.html
+// @matches - '//pre[@class="rust rust-example-rendered"]' \
+//     '(?m)let example = Example::new\(\)\n    \.first\(\)\n    \.second\(\)\n    \.build\(\);\Z'
+/// ```rust
+/// let example = Example::new()
+///     .first()
+#[cfg_attr(not(feature = "one"), doc = "    .second()")]
+///     .build();
+/// ```
+pub struct Example;
+
+// @has foo/struct.F.html
+// @matches - '//pre[@class="rust rust-example-rendered"]' \
+//     '(?m)let example = Example::new\(\)\n    \.first\(\)\n    \.another\(\)\n    \.build\(\);\Z'
+///```rust
+///let example = Example::new()
+///    .first()
+#[cfg_attr(not(feature = "one"), doc = "    .another()")]
+///    .build();
+/// ```
+pub struct F;
+
+// @has foo/struct.G.html
+// @matches - '//pre[@class="rust rust-example-rendered"]' \
+//     '(?m)let example = Example::new\(\)\n\.first\(\)\n    \.another\(\)\n\.build\(\);\Z'
+///```rust
+///let example = Example::new()
+///.first()
+#[cfg_attr(not(feature = "one"), doc = "    .another()")]
+///.build();
+///```
+pub struct G;
+
+// @has foo/struct.H.html
+// @has - '//div[@class="docblock"]/p' 'no whitespace lol'
+///no whitespace
+#[doc = " lol"]
+pub struct H;
+
+// @has foo/struct.I.html
+// @matches - '//pre[@class="rust rust-example-rendered"]' '(?m)4 whitespaces!\Z'
+///     4 whitespaces!
+#[doc = "something"]
+pub struct I;
+
+// @has foo/struct.J.html
+// @matches - '//div[@class="docblock"]/p' '(?m)a\nno whitespace\nJust some text.\Z'
+///a
+///no whitespace
+#[doc(include = "unindent.md")]
+pub struct J;
+
+// @has foo/struct.K.html
+// @matches - '//pre[@class="rust rust-example-rendered"]' '(?m)4 whitespaces!\Z'
+///a
+///
+///    4 whitespaces!
+///
+#[doc(include = "unindent.md")]
+pub struct K;
author	Yuki Okushi <huyuumi.dev@gmail.com>	2020-11-03 15:27:05 +0900
committer	GitHub <noreply@github.com>	2020-11-03 15:27:05 +0900
commit	e731a5a7f55b77e75573068d788ce61033b9bb62 (patch)
tree	53ea1ea8a9bf39b304569094dbc0619e64db388a
parent	0716724a0b29269ba3b79abb65f1f0505e5bb0ec (diff)
parent	87f28978c532547df7e8d746ad98f863a202bab0 (diff)
download	rust-e731a5a7f55b77e75573068d788ce61033b9bb62.tar.gz rust-e731a5a7f55b77e75573068d788ce61033b9bb62.zip