about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-01-02 15:31:53 +0000
committerbors <bors@rust-lang.org>2021-01-02 15:31:53 +0000
commitfd85ca02f6da4531bcec65b8309ecf38e0eb8938 (patch)
tree1be9b90cb754d2a6dc5623050918ec92d7c67c44
parent90ccf4f5adfb2562fc95c996b97faac7775a34bb (diff)
parentf07354333f254fc7d3f20c939b29ba93143b8671 (diff)
downloadrust-fd85ca02f6da4531bcec65b8309ecf38e0eb8938.tar.gz
rust-fd85ca02f6da4531bcec65b8309ecf38e0eb8938.zip
Auto merge of #80550 - bugadani:markdown-refactor, r=jyn514
Cleanup markdown span handling, take 2

This PR includes the cleanups made in #80244 except for the removal of `locate()`.

While the biggest conceptual part in #80244 was the removal of `locate()`, it introduced a diagnostic regression.

Additional cleanup:
 - Use `RefCell` to avoid building two separate vectors for the links

Work to do:
- [ ] Decide if `locate()` can be simplified by assuming `s` is always in `md`
- [ ] Should probably add some tests that still provide the undesired diagnostics causing #80381

cc `@jyn514` This is the best I can do without patching Pulldown to provide multiple ranges for reference-style links. Also, since `locate` is probably more efficient than `rfind` (at least it's constant time), I decided to not check the link type and just cover every &str as it was before.
-rw-r--r--src/librustdoc/html/markdown.rs155
-rw-r--r--src/librustdoc/passes/collect_intra_doc_links.rs59
2 files changed, 111 insertions, 103 deletions
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
index e519c6ee983..07744139605 100644
--- a/src/librustdoc/html/markdown.rs
+++ b/src/librustdoc/html/markdown.rs
@@ -25,6 +25,7 @@ use rustc_session::lint;
 use rustc_span::edition::Edition;
 use rustc_span::Span;
 use std::borrow::Cow;
+use std::cell::RefCell;
 use std::collections::VecDeque;
 use std::default::Default;
 use std::fmt::Write;
@@ -414,11 +415,13 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
     }
 }
 
+type SpannedEvent<'a> = (Event<'a>, Range<usize>);
+
 /// Make headings links with anchor IDs and build up TOC.
 struct HeadingLinks<'a, 'b, 'ids, I> {
     inner: I,
     toc: Option<&'b mut TocBuilder>,
-    buf: VecDeque<Event<'a>>,
+    buf: VecDeque<SpannedEvent<'a>>,
     id_map: &'ids mut IdMap,
 }
 
@@ -428,8 +431,10 @@ impl<'a, 'b, 'ids, I> HeadingLinks<'a, 'b, 'ids, I> {
     }
 }
 
-impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a, 'b, 'ids, I> {
-    type Item = Event<'a>;
+impl<'a, 'b, 'ids, I: Iterator<Item = SpannedEvent<'a>>> Iterator
+    for HeadingLinks<'a, 'b, 'ids, I>
+{
+    type Item = SpannedEvent<'a>;
 
     fn next(&mut self) -> Option<Self::Item> {
         if let Some(e) = self.buf.pop_front() {
@@ -437,31 +442,29 @@ impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a,
         }
 
         let event = self.inner.next();
-        if let Some(Event::Start(Tag::Heading(level))) = event {
+        if let Some((Event::Start(Tag::Heading(level)), _)) = event {
             let mut id = String::new();
             for event in &mut self.inner {
-                match &event {
+                match &event.0 {
                     Event::End(Tag::Heading(..)) => break,
+                    Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
                     Event::Text(text) | Event::Code(text) => {
                         id.extend(text.chars().filter_map(slugify));
+                        self.buf.push_back(event);
                     }
-                    _ => {}
-                }
-                match event {
-                    Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
-                    event => self.buf.push_back(event),
+                    _ => self.buf.push_back(event),
                 }
             }
             let id = self.id_map.derive(id);
 
             if let Some(ref mut builder) = self.toc {
                 let mut html_header = String::new();
-                html::push_html(&mut html_header, self.buf.iter().cloned());
+                html::push_html(&mut html_header, self.buf.iter().map(|(ev, _)| ev.clone()));
                 let sec = builder.push(level as u32, html_header, id.clone());
-                self.buf.push_front(Event::Html(format!("{} ", sec).into()));
+                self.buf.push_front((Event::Html(format!("{} ", sec).into()), 0..0));
             }
 
-            self.buf.push_back(Event::Html(format!("</a></h{}>", level).into()));
+            self.buf.push_back((Event::Html(format!("</a></h{}>", level).into()), 0..0));
 
             let start_tags = format!(
                 "<h{level} id=\"{id}\" class=\"section-header\">\
@@ -469,7 +472,7 @@ impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a,
                 id = id,
                 level = level
             );
-            return Some(Event::Html(start_tags.into()));
+            return Some((Event::Html(start_tags.into()), 0..0));
         }
         event
     }
@@ -555,23 +558,23 @@ impl<'a, I> Footnotes<'a, I> {
     }
 }
 
-impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
-    type Item = Event<'a>;
+impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for Footnotes<'a, I> {
+    type Item = SpannedEvent<'a>;
 
     fn next(&mut self) -> Option<Self::Item> {
         loop {
             match self.inner.next() {
-                Some(Event::FootnoteReference(ref reference)) => {
+                Some((Event::FootnoteReference(ref reference), range)) => {
                     let entry = self.get_entry(&reference);
                     let reference = format!(
                         "<sup id=\"fnref{0}\"><a href=\"#fn{0}\">{0}</a></sup>",
                         (*entry).1
                     );
-                    return Some(Event::Html(reference.into()));
+                    return Some((Event::Html(reference.into()), range));
                 }
-                Some(Event::Start(Tag::FootnoteDefinition(def))) => {
+                Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
                     let mut content = Vec::new();
-                    for event in &mut self.inner {
+                    for (event, _) in &mut self.inner {
                         if let Event::End(Tag::FootnoteDefinition(..)) = event {
                             break;
                         }
@@ -602,7 +605,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
                             ret.push_str("</li>");
                         }
                         ret.push_str("</ol></div>");
-                        return Some(Event::Html(ret.into()));
+                        return Some((Event::Html(ret.into()), 0..0));
                     } else {
                         return None;
                     }
@@ -912,13 +915,14 @@ impl Markdown<'_> {
         };
 
         let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer));
+        let p = p.into_offset_iter();
 
         let mut s = String::with_capacity(md.len() * 3 / 2);
 
         let p = HeadingLinks::new(p, None, &mut ids);
-        let p = LinkReplacer::new(p, links);
-        let p = CodeBlocks::new(p, codes, edition, playground);
         let p = Footnotes::new(p);
+        let p = LinkReplacer::new(p.map(|(ev, _)| ev), links);
+        let p = CodeBlocks::new(p, codes, edition, playground);
         html::push_html(&mut s, p);
 
         s
@@ -929,7 +933,7 @@ impl MarkdownWithToc<'_> {
     crate fn into_string(self) -> String {
         let MarkdownWithToc(md, mut ids, codes, edition, playground) = self;
 
-        let p = Parser::new_ext(md, opts());
+        let p = Parser::new_ext(md, opts()).into_offset_iter();
 
         let mut s = String::with_capacity(md.len() * 3 / 2);
 
@@ -937,8 +941,8 @@ impl MarkdownWithToc<'_> {
 
         {
             let p = HeadingLinks::new(p, Some(&mut toc), &mut ids);
-            let p = CodeBlocks::new(p, codes, edition, playground);
             let p = Footnotes::new(p);
+            let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
             html::push_html(&mut s, p);
         }
 
@@ -954,19 +958,19 @@ impl MarkdownHtml<'_> {
         if md.is_empty() {
             return String::new();
         }
-        let p = Parser::new_ext(md, opts());
+        let p = Parser::new_ext(md, opts()).into_offset_iter();
 
         // Treat inline HTML as plain text.
-        let p = p.map(|event| match event {
-            Event::Html(text) => Event::Text(text),
+        let p = p.map(|event| match event.0 {
+            Event::Html(text) => (Event::Text(text), event.1),
             _ => event,
         });
 
         let mut s = String::with_capacity(md.len() * 3 / 2);
 
         let p = HeadingLinks::new(p, None, &mut ids);
-        let p = CodeBlocks::new(p, codes, edition, playground);
         let p = Footnotes::new(p);
+        let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
         html::push_html(&mut s, p);
 
         s
@@ -1119,56 +1123,65 @@ crate fn plain_text_summary(md: &str) -> String {
     s
 }
 
-crate fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
+crate fn markdown_links(md: &str) -> Vec<(String, Range<usize>)> {
     if md.is_empty() {
         return vec![];
     }
 
-    let mut links = vec![];
-    let mut shortcut_links = vec![];
-
-    {
-        let locate = |s: &str| unsafe {
-            let s_start = s.as_ptr();
-            let s_end = s_start.add(s.len());
-            let md_start = md.as_ptr();
-            let md_end = md_start.add(md.len());
-            if md_start <= s_start && s_end <= md_end {
-                let start = s_start.offset_from(md_start) as usize;
-                let end = s_end.offset_from(md_start) as usize;
-                Some(start..end)
-            } else {
-                None
-            }
-        };
+    let links = RefCell::new(vec![]);
+
+    // FIXME: remove this function once pulldown_cmark can provide spans for link definitions.
+    let locate = |s: &str, fallback: Range<usize>| unsafe {
+        let s_start = s.as_ptr();
+        let s_end = s_start.add(s.len());
+        let md_start = md.as_ptr();
+        let md_end = md_start.add(md.len());
+        if md_start <= s_start && s_end <= md_end {
+            let start = s_start.offset_from(md_start) as usize;
+            let end = s_end.offset_from(md_start) as usize;
+            start..end
+        } else {
+            fallback
+        }
+    };
+
+    let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
+        // For diagnostics, we want to underline the link's definition but `span` will point at
+        // where the link is used. This is a problem for reference-style links, where the definition
+        // is separate from the usage.
+        match link {
+            // `Borrowed` variant means the string (the link's destination) may come directly from
+            // the markdown text and we can locate the original link destination.
+            // NOTE: LinkReplacer also provides `Borrowed` but possibly from other sources,
+            // so `locate()` can fall back to use `span`.
+            CowStr::Borrowed(s) => locate(s, span),
+
+            // For anything else, we can only use the provided range.
+            CowStr::Boxed(_) | CowStr::Inlined(_) => span,
+        }
+    };
 
-        let mut push = |link: BrokenLink<'_>| {
-            // FIXME: use `link.span` instead of `locate`
-            // (doing it now includes the `[]` as well as the text)
-            shortcut_links.push((link.reference.to_owned(), locate(link.reference)));
-            None
-        };
-        let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push));
-
-        // There's no need to thread an IdMap through to here because
-        // the IDs generated aren't going to be emitted anywhere.
-        let mut ids = IdMap::new();
-        let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
-
-        for ev in iter {
-            if let Event::Start(Tag::Link(_, dest, _)) = ev {
-                debug!("found link: {}", dest);
-                links.push(match dest {
-                    CowStr::Borrowed(s) => (s.to_owned(), locate(s)),
-                    s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None),
-                });
-            }
+    let mut push = |link: BrokenLink<'_>| {
+        let span = span_for_link(&CowStr::Borrowed(link.reference), link.span);
+        links.borrow_mut().push((link.reference.to_owned(), span));
+        None
+    };
+    let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push)).into_offset_iter();
+
+    // There's no need to thread an IdMap through to here because
+    // the IDs generated aren't going to be emitted anywhere.
+    let mut ids = IdMap::new();
+    let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
+
+    for ev in iter {
+        if let Event::Start(Tag::Link(_, dest, _)) = ev.0 {
+            debug!("found link: {}", dest);
+            let span = span_for_link(&dest, ev.1);
+            links.borrow_mut().push((dest.into_string(), span));
         }
     }
 
-    links.append(&mut shortcut_links);
-
-    links
+    links.into_inner()
 }
 
 #[derive(Debug)]
diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs
index 1719b35a362..bd8bfd8e1c9 100644
--- a/src/librustdoc/passes/collect_intra_doc_links.rs
+++ b/src/librustdoc/passes/collect_intra_doc_links.rs
@@ -245,7 +245,7 @@ struct DiagnosticInfo<'a> {
     item: &'a Item,
     dox: &'a str,
     ori_link: &'a str,
-    link_range: Option<Range<usize>>,
+    link_range: Range<usize>,
 }
 
 #[derive(Clone, Debug, Hash)]
@@ -960,7 +960,7 @@ impl LinkCollector<'_, '_> {
         parent_node: Option<DefId>,
         krate: CrateNum,
         ori_link: String,
-        link_range: Option<Range<usize>>,
+        link_range: Range<usize>,
     ) -> Option<ItemLink> {
         trace!("considering link '{}'", ori_link);
 
@@ -1606,7 +1606,7 @@ fn report_diagnostic(
     msg: &str,
     item: &Item,
     dox: &str,
-    link_range: &Option<Range<usize>>,
+    link_range: &Range<usize>,
     decorate: impl FnOnce(&mut DiagnosticBuilder<'_>, Option<rustc_span::Span>),
 ) {
     let hir_id = match cx.as_local_hir_id(item.def_id) {
@@ -1624,31 +1624,27 @@ fn report_diagnostic(
     cx.tcx.struct_span_lint_hir(lint, hir_id, sp, |lint| {
         let mut diag = lint.build(msg);
 
-        let span = link_range
-            .as_ref()
-            .and_then(|range| super::source_span_for_markdown_range(cx, dox, range, attrs));
+        let span = super::source_span_for_markdown_range(cx, dox, link_range, attrs);
 
-        if let Some(link_range) = link_range {
-            if let Some(sp) = span {
-                diag.set_span(sp);
-            } else {
-                // blah blah blah\nblah\nblah [blah] blah blah\nblah blah
-                //                       ^     ~~~~
-                //                       |     link_range
-                //                       last_new_line_offset
-                let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1);
-                let line = dox[last_new_line_offset..].lines().next().unwrap_or("");
-
-                // Print the line containing the `link_range` and manually mark it with '^'s.
-                diag.note(&format!(
-                    "the link appears in this line:\n\n{line}\n\
+        if let Some(sp) = span {
+            diag.set_span(sp);
+        } else {
+            // blah blah blah\nblah\nblah [blah] blah blah\nblah blah
+            //                       ^     ~~~~
+            //                       |     link_range
+            //                       last_new_line_offset
+            let last_new_line_offset = dox[..link_range.start].rfind('\n').map_or(0, |n| n + 1);
+            let line = dox[last_new_line_offset..].lines().next().unwrap_or("");
+
+            // Print the line containing the `link_range` and manually mark it with '^'s.
+            diag.note(&format!(
+                "the link appears in this line:\n\n{line}\n\
                      {indicator: <before$}{indicator:^<found$}",
-                    line = line,
-                    indicator = "",
-                    before = link_range.start - last_new_line_offset,
-                    found = link_range.len(),
-                ));
-            }
+                line = line,
+                indicator = "",
+                before = link_range.start - last_new_line_offset,
+                found = link_range.len(),
+            ));
         }
 
         decorate(&mut diag, span);
@@ -1668,7 +1664,7 @@ fn resolution_failure(
     path_str: &str,
     disambiguator: Option<Disambiguator>,
     dox: &str,
-    link_range: Option<Range<usize>>,
+    link_range: Range<usize>,
     kinds: SmallVec<[ResolutionFailure<'_>; 3]>,
 ) {
     let tcx = collector.cx.tcx;
@@ -1892,7 +1888,7 @@ fn anchor_failure(
     item: &Item,
     path_str: &str,
     dox: &str,
-    link_range: Option<Range<usize>>,
+    link_range: Range<usize>,
     failure: AnchorFailure,
 ) {
     let msg = match failure {
@@ -1917,7 +1913,7 @@ fn ambiguity_error(
     item: &Item,
     path_str: &str,
     dox: &str,
-    link_range: Option<Range<usize>>,
+    link_range: Range<usize>,
     candidates: Vec<Res>,
 ) {
     let mut msg = format!("`{}` is ", path_str);
@@ -1966,13 +1962,12 @@ fn suggest_disambiguator(
     path_str: &str,
     dox: &str,
     sp: Option<rustc_span::Span>,
-    link_range: &Option<Range<usize>>,
+    link_range: &Range<usize>,
 ) {
     let suggestion = disambiguator.suggestion();
     let help = format!("to link to the {}, {}", disambiguator.descr(), suggestion.descr());
 
     if let Some(sp) = sp {
-        let link_range = link_range.as_ref().expect("must have a link range if we have a span");
         let msg = if dox.bytes().nth(link_range.start) == Some(b'`') {
             format!("`{}`", suggestion.as_help(path_str))
         } else {
@@ -1991,7 +1986,7 @@ fn privacy_error(
     item: &Item,
     path_str: &str,
     dox: &str,
-    link_range: Option<Range<usize>>,
+    link_range: Range<usize>,
 ) {
     let sym;
     let item_name = match item.name {