about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Howell <michael@notriddle.com>2024-06-29 11:24:36 -0700
committerMichael Howell <michael@notriddle.com>2024-07-01 07:21:02 -0700
commit294c3dda881ae65d528ee0380b7628deaf33ae96 (patch)
tree66c8624903b2f8d35ae783635e5bc6642f168907
parent15fbe618a14ddde520561c4cf1b85d4e4c9005f8 (diff)
downloadrust-294c3dda881ae65d528ee0380b7628deaf33ae96.tar.gz
rust-294c3dda881ae65d528ee0380b7628deaf33ae96.zip
rustdoc: add usable lint for pulldown-cmark-0.11 parsing changes
-rw-r--r--Cargo.lock12
-rw-r--r--src/librustdoc/Cargo.toml1
-rw-r--r--src/librustdoc/lint.rs9
-rw-r--r--src/librustdoc/passes/lint.rs2
-rw-r--r--src/librustdoc/passes/lint/unportable_markdown.rs152
-rw-r--r--tests/rustdoc-ui/unportable-markdown.rs63
-rw-r--r--tests/rustdoc-ui/unportable-markdown.stderr39
7 files changed, 278 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3af90a252ae..96cef907084 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3123,6 +3123,17 @@ dependencies = [
 
 [[package]]
 name = "pulldown-cmark"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b"
+dependencies = [
+ "bitflags 2.5.0",
+ "memchr",
+ "unicase",
+]
+
+[[package]]
+name = "pulldown-cmark"
 version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76979bea66e7875e7509c4ec5300112b316af87fa7a252ca91c448b32dfe3993"
@@ -4890,6 +4901,7 @@ dependencies = [
  "indexmap",
  "itertools",
  "minifier",
+ "pulldown-cmark 0.9.6",
  "regex",
  "rustdoc-json-types",
  "serde",
diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml
index 31222f213d8..51fb126cb34 100644
--- a/src/librustdoc/Cargo.toml
+++ b/src/librustdoc/Cargo.toml
@@ -13,6 +13,7 @@ base64 = "0.21.7"
 itertools = "0.12"
 indexmap = "2"
 minifier = "0.3.0"
+pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false }
 regex = "1"
 rustdoc-json-types = { path = "../rustdoc-json-types" }
 serde_json = "1.0"
diff --git a/src/librustdoc/lint.rs b/src/librustdoc/lint.rs
index dd2bb47e592..8eaca70eaff 100644
--- a/src/librustdoc/lint.rs
+++ b/src/librustdoc/lint.rs
@@ -196,6 +196,14 @@ declare_rustdoc_lint! {
     "detects redundant explicit links in doc comments"
 }
 
+declare_rustdoc_lint! {
+    /// This compatibility lint checks for Markdown syntax that works in the old engine but not
+    /// the new one.
+    UNPORTABLE_MARKDOWN,
+    Warn,
+    "detects markdown that is interpreted differently in different parser"
+}
+
 pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
     vec![
         BROKEN_INTRA_DOC_LINKS,
@@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
         MISSING_CRATE_LEVEL_DOCS,
         UNESCAPED_BACKTICKS,
         REDUNDANT_EXPLICIT_LINKS,
+        UNPORTABLE_MARKDOWN,
     ]
 });
 
diff --git a/src/librustdoc/passes/lint.rs b/src/librustdoc/passes/lint.rs
index c6d5b7bd346..bc804a340bf 100644
--- a/src/librustdoc/passes/lint.rs
+++ b/src/librustdoc/passes/lint.rs
@@ -6,6 +6,7 @@ mod check_code_block_syntax;
 mod html_tags;
 mod redundant_explicit_links;
 mod unescaped_backticks;
+mod unportable_markdown;
 
 use super::Pass;
 use crate::clean::*;
@@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> {
         html_tags::visit_item(self.cx, item);
         unescaped_backticks::visit_item(self.cx, item);
         redundant_explicit_links::visit_item(self.cx, item);
+        unportable_markdown::visit_item(self.cx, item);
 
         self.visit_item_recur(item)
     }
diff --git a/src/librustdoc/passes/lint/unportable_markdown.rs b/src/librustdoc/passes/lint/unportable_markdown.rs
new file mode 100644
index 00000000000..5f185377634
--- /dev/null
+++ b/src/librustdoc/passes/lint/unportable_markdown.rs
@@ -0,0 +1,152 @@
+//! Detects specific markdown syntax that's different between pulldown-cmark
+//! 0.9 and 0.11.
+//!
+//! This is a mitigation for old parser bugs that affected some
+//! real crates' docs. The old parser claimed to comply with CommonMark,
+//! but it did not. These warnings will eventually be removed,
+//! though some of them may become Clippy lints.
+//!
+//! <https://github.com/rust-lang/rust/pull/121659#issuecomment-1992752820>
+//!
+//! <https://rustc-dev-guide.rust-lang.org/bug-fix-procedure.html#add-the-lint-to-the-list-of-removed-lists>
+
+use crate::clean::Item;
+use crate::core::DocContext;
+use pulldown_cmark as cmarkn;
+use pulldown_cmark_old as cmarko;
+use rustc_lint_defs::Applicability;
+use rustc_resolve::rustdoc::source_span_for_markdown_range;
+use std::collections::{BTreeMap, BTreeSet};
+
+pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
+    let tcx = cx.tcx;
+    let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else {
+        // If non-local, no need to check anything.
+        return;
+    };
+
+    let dox = item.doc_value();
+    if dox.is_empty() {
+        return;
+    }
+
+    // P1: unintended strikethrough was fixed by requiring single-tildes to flank
+    // the same way underscores do, so nothing is done here
+
+    // P2: block quotes without following space parsed wrong
+    //
+    // This is the set of starting points for block quotes with no space after
+    // the `>`. It is populated by the new parser, and if the old parser fails to
+    // clear it out, it'll produce a warning.
+    let mut spaceless_block_quotes = BTreeSet::new();
+
+    // P3: missing footnote references
+    //
+    // This is populated by listening for FootnoteReference from
+    // the new parser and old parser.
+    let mut missing_footnote_references = BTreeMap::new();
+    let mut found_footnote_references = BTreeSet::new();
+
+    // populate problem cases from new parser
+    {
+        pub fn main_body_opts_new() -> cmarkn::Options {
+            cmarkn::Options::ENABLE_TABLES
+                | cmarkn::Options::ENABLE_FOOTNOTES
+                | cmarkn::Options::ENABLE_STRIKETHROUGH
+                | cmarkn::Options::ENABLE_TASKLISTS
+                | cmarkn::Options::ENABLE_SMART_PUNCTUATION
+        }
+        let mut parser_new = cmarkn::Parser::new_ext(&dox, main_body_opts_new()).into_offset_iter();
+        while let Some((event, span)) = parser_new.next() {
+            if let cmarkn::Event::Start(cmarkn::Tag::BlockQuote(_)) = event {
+                if !dox[span.clone()].starts_with("> ") {
+                    spaceless_block_quotes.insert(span.start);
+                }
+            }
+            if let cmarkn::Event::FootnoteReference(_) = event {
+                found_footnote_references.insert(span.start + 1);
+            }
+        }
+    }
+
+    // remove cases where they don't actually differ
+    {
+        pub fn main_body_opts_old() -> cmarko::Options {
+            cmarko::Options::ENABLE_TABLES
+                | cmarko::Options::ENABLE_FOOTNOTES
+                | cmarko::Options::ENABLE_STRIKETHROUGH
+                | cmarko::Options::ENABLE_TASKLISTS
+                | cmarko::Options::ENABLE_SMART_PUNCTUATION
+        }
+        let mut parser_old = cmarko::Parser::new_ext(&dox, main_body_opts_old()).into_offset_iter();
+        while let Some((event, span)) = parser_old.next() {
+            if let cmarko::Event::Start(cmarko::Tag::BlockQuote) = event {
+                if !dox[span.clone()].starts_with("> ") {
+                    spaceless_block_quotes.remove(&span.start);
+                }
+            }
+            if let cmarko::Event::FootnoteReference(_) = event {
+                if !found_footnote_references.contains(&(span.start + 1)) {
+                    missing_footnote_references.insert(span.start + 1, span);
+                }
+            }
+        }
+    }
+
+    for start in spaceless_block_quotes {
+        let (span, precise) =
+            source_span_for_markdown_range(tcx, &dox, &(start..start + 1), &item.attrs.doc_strings)
+                .map(|span| (span, true))
+                .unwrap_or_else(|| (item.attr_span(tcx), false));
+
+        tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, |lint| {
+            lint.primary_message("unportable markdown");
+            lint.help(format!("confusing block quote with no space after the `>` marker"));
+            if precise {
+                lint.span_suggestion(
+                    span.shrink_to_hi(),
+                    "if the quote is intended, add a space",
+                    " ",
+                    Applicability::MaybeIncorrect,
+                );
+                lint.span_suggestion(
+                    span.shrink_to_lo(),
+                    "if it should not be a quote, escape it",
+                    "\\",
+                    Applicability::MaybeIncorrect,
+                );
+            }
+        });
+    }
+    for (_caret, span) in missing_footnote_references {
+        let (ref_span, precise) =
+            source_span_for_markdown_range(tcx, &dox, &span, &item.attrs.doc_strings)
+                .map(|span| (span, true))
+                .unwrap_or_else(|| (item.attr_span(tcx), false));
+
+        tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, ref_span, |lint| {
+            lint.primary_message("unportable markdown");
+            if precise {
+                lint.span_suggestion(
+                    ref_span.shrink_to_lo(),
+                    "if it should not be a footnote, escape it",
+                    "\\",
+                    Applicability::MaybeIncorrect,
+                );
+            }
+            if dox.as_bytes().get(span.end) == Some(&b'[') {
+                lint.help("confusing footnote reference and link");
+                if precise {
+                    lint.span_suggestion(
+                        ref_span.shrink_to_hi(),
+                        "if the footnote is intended, add a space",
+                        " ",
+                        Applicability::MaybeIncorrect,
+                    );
+                } else {
+                    lint.help("there should be a space between the link and the footnote");
+                }
+            }
+        });
+    }
+}
diff --git a/tests/rustdoc-ui/unportable-markdown.rs b/tests/rustdoc-ui/unportable-markdown.rs
new file mode 100644
index 00000000000..8035e680f3c
--- /dev/null
+++ b/tests/rustdoc-ui/unportable-markdown.rs
@@ -0,0 +1,63 @@
+// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929
+//
+// A series of test cases for CommonMark corner cases that pulldown-cmark 0.11 fixes.
+//
+// This version of the lint is targeted at two especially-common cases where docs got broken.
+// Other differences in parsing should not warn.
+#![allow(rustdoc::broken_intra_doc_links)]
+#![deny(rustdoc::unportable_markdown)]
+
+/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/654>
+///
+/// Test footnote [^foot].
+///
+/// [^foot]: This is nested within the footnote now, but didn't used to be.
+///
+///     This is a multi-paragraph footnote.
+pub struct GfmFootnotes;
+
+/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/773>
+///
+/// test [^foo][^bar]
+//~^ ERROR unportable markdown
+///
+/// [^foo]: test
+/// [^bar]: test2
+pub struct FootnoteSmashedName;
+
+/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/829>
+///
+/// - _t
+///   # test
+///   t_
+pub struct NestingCornerCase;
+
+/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/650>
+///
+/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
+pub struct Emphasis1;
+
+/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/732>
+///
+/// |
+/// |
+pub struct NotEnoughTable;
+
+/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/675>
+///
+/// foo
+/// >bar
+//~^ ERROR unportable markdown
+pub struct BlockQuoteNoSpace;
+
+/// Negative test.
+///
+/// foo
+/// > bar
+pub struct BlockQuoteSpace;
+
+/// Negative test.
+///
+/// >bar
+/// baz
+pub struct BlockQuoteNoSpaceStart;
diff --git a/tests/rustdoc-ui/unportable-markdown.stderr b/tests/rustdoc-ui/unportable-markdown.stderr
new file mode 100644
index 00000000000..b524aca25ae
--- /dev/null
+++ b/tests/rustdoc-ui/unportable-markdown.stderr
@@ -0,0 +1,39 @@
+error: unportable markdown
+  --> $DIR/unportable-markdown.rs:21:10
+   |
+LL | /// test [^foo][^bar]
+   |          ^^^^^^
+   |
+   = help: confusing footnote reference and link
+note: the lint level is defined here
+  --> $DIR/unportable-markdown.rs:8:9
+   |
+LL | #![deny(rustdoc::unportable_markdown)]
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+help: if it should not be a footnote, escape it
+   |
+LL | /// test \[^foo][^bar]
+   |          +
+help: if the footnote is intended, add a space
+   |
+LL | /// test [^foo] [^bar]
+   |                +
+
+error: unportable markdown
+  --> $DIR/unportable-markdown.rs:49:5
+   |
+LL | /// >bar
+   |     ^
+   |
+   = help: confusing block quote with no space after the `>` marker
+help: if the quote is intended, add a space
+   |
+LL | /// > bar
+   |      +
+help: if it should not be a quote, escape it
+   |
+LL | /// \>bar
+   |     +
+
+error: aborting due to 2 previous errors
+