about summary refs log tree commit diff
path: root/compiler/rustc_resolve/src/rustdoc.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_resolve/src/rustdoc.rs')
-rw-r--r--compiler/rustc_resolve/src/rustdoc.rs552
1 files changed, 552 insertions, 0 deletions
diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs
new file mode 100644
index 00000000000..4ff4ccf5e98
--- /dev/null
+++ b/compiler/rustc_resolve/src/rustdoc.rs
@@ -0,0 +1,552 @@
+use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
+use rustc_ast as ast;
+use rustc_ast::util::comments::beautify_doc_string;
+use rustc_data_structures::fx::FxHashMap;
+use rustc_middle::ty::TyCtxt;
+use rustc_span::def_id::DefId;
+use rustc_span::symbol::{kw, sym, Symbol};
+use rustc_span::{InnerSpan, Span, DUMMY_SP};
+use std::ops::Range;
+use std::{cmp, mem};
+
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum DocFragmentKind {
+    /// A doc fragment created from a `///` or `//!` doc comment.
+    SugaredDoc,
+    /// A doc fragment created from a "raw" `#[doc=""]` attribute.
+    RawDoc,
+}
+
+/// A portion of documentation, extracted from a `#[doc]` attribute.
+///
+/// Each variant contains the line number within the complete doc-comment where the fragment
+/// starts, as well as the Span where the corresponding doc comment or attribute is located.
+///
+/// Included files are kept separate from inline doc comments so that proper line-number
+/// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
+/// kept separate because of issue #42760.
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct DocFragment {
+    pub span: Span,
+    /// The item this doc-comment came from.
+    /// Used to determine the scope in which doc links in this fragment are resolved.
+    /// Typically filled for reexport docs when they are merged into the docs of the
+    /// original reexported item.
+    /// If the id is not filled, which happens for the original reexported item, then
+    /// it has to be taken from somewhere else during doc link resolution.
+    pub item_id: Option<DefId>,
+    pub doc: Symbol,
+    pub kind: DocFragmentKind,
+    pub indent: usize,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum MalformedGenerics {
+    /// This link has unbalanced angle brackets.
+    ///
+    /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
+    UnbalancedAngleBrackets,
+    /// The generics are not attached to a type.
+    ///
+    /// For example, `<T>` should trigger this.
+    ///
+    /// This is detected by checking if the path is empty after the generics are stripped.
+    MissingType,
+    /// The link uses fully-qualified syntax, which is currently unsupported.
+    ///
+    /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
+    ///
+    /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
+    /// angle brackets.
+    HasFullyQualifiedSyntax,
+    /// The link has an invalid path separator.
+    ///
+    /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
+    /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
+    /// called.
+    ///
+    /// Note that this will also **not** be triggered if the invalid path separator is inside angle
+    /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
+    /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
+    ///
+    /// This is detected by checking if there is a colon followed by a non-colon in the link.
+    InvalidPathSeparator,
+    /// The link has too many angle brackets.
+    ///
+    /// For example, `Vec<<T>>` should trigger this.
+    TooManyAngleBrackets,
+    /// The link has empty angle brackets.
+    ///
+    /// For example, `Vec<>` should trigger this.
+    EmptyAngleBrackets,
+}
+
+/// Removes excess indentation on comments in order for the Markdown
+/// to be parsed correctly. This is necessary because the convention for
+/// writing documentation is to provide a space between the /// or //! marker
+/// and the doc text, but Markdown is whitespace-sensitive. For example,
+/// a block of text with four-space indentation is parsed as a code block,
+/// so if we didn't unindent comments, these list items
+///
+/// /// A list:
+/// ///
+/// ///    - Foo
+/// ///    - Bar
+///
+/// would be parsed as if they were in a code block, which is likely not what the user intended.
+pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
+    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
+    // fragments kind's lines are never starting with a whitespace unless they are using some
+    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
+    // we need to take into account the fact that the minimum indent minus one (to take this
+    // whitespace into account).
+    //
+    // For example:
+    //
+    // /// hello!
+    // #[doc = "another"]
+    //
+    // In this case, you want "hello! another" and not "hello!  another".
+    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
+        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
+    {
+        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
+        // "decide" how much the minimum indent will be.
+        1
+    } else {
+        0
+    };
+
+    // `min_indent` is used to know how much whitespaces from the start of each lines must be
+    // removed. Example:
+    //
+    // ///     hello!
+    // #[doc = "another"]
+    //
+    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
+    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
+    // (5 - 1) whitespaces.
+    let Some(min_indent) = docs
+        .iter()
+        .map(|fragment| {
+            fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
+                if line.chars().all(|c| c.is_whitespace()) {
+                    min_indent
+                } else {
+                    // Compare against either space or tab, ignoring whether they are
+                    // mixed or not.
+                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
+                    cmp::min(min_indent, whitespace)
+                        + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
+                }
+            })
+        })
+        .min()
+    else {
+        return;
+    };
+
+    for fragment in docs {
+        if fragment.doc == kw::Empty {
+            continue;
+        }
+
+        let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
+            min_indent - add
+        } else {
+            min_indent
+        };
+
+        fragment.indent = min_indent;
+    }
+}
+
+/// The goal of this function is to apply the `DocFragment` transformation that is required when
+/// transforming into the final Markdown, which is applying the computed indent to each line in
+/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
+///
+/// Note: remove the trailing newline where appropriate
+pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
+    let s = frag.doc.as_str();
+    let mut iter = s.lines();
+    if s.is_empty() {
+        out.push('\n');
+        return;
+    }
+    while let Some(line) = iter.next() {
+        if line.chars().any(|c| !c.is_whitespace()) {
+            assert!(line.len() >= frag.indent);
+            out.push_str(&line[frag.indent..]);
+        } else {
+            out.push_str(line);
+        }
+        out.push('\n');
+    }
+}
+
+pub fn attrs_to_doc_fragments<'a>(
+    attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>,
+    doc_only: bool,
+) -> (Vec<DocFragment>, ast::AttrVec) {
+    let mut doc_fragments = Vec::new();
+    let mut other_attrs = ast::AttrVec::new();
+    for (attr, item_id) in attrs {
+        if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
+            let doc = beautify_doc_string(doc_str, comment_kind);
+            let kind = if attr.is_doc_comment() {
+                DocFragmentKind::SugaredDoc
+            } else {
+                DocFragmentKind::RawDoc
+            };
+            let fragment = DocFragment { span: attr.span, doc, kind, item_id, indent: 0 };
+            doc_fragments.push(fragment);
+        } else if !doc_only {
+            other_attrs.push(attr.clone());
+        }
+    }
+
+    unindent_doc_fragments(&mut doc_fragments);
+
+    (doc_fragments, other_attrs)
+}
+
+/// Return the doc-comments on this item, grouped by the module they came from.
+/// The module can be different if this is a re-export with added documentation.
+///
+/// The last newline is not trimmed so the produced strings are reusable between
+/// early and late doc link resolution regardless of their position.
+pub fn prepare_to_doc_link_resolution(
+    doc_fragments: &[DocFragment],
+) -> FxHashMap<Option<DefId>, String> {
+    let mut res = FxHashMap::default();
+    for fragment in doc_fragments {
+        let out_str = res.entry(fragment.item_id).or_default();
+        add_doc_fragment(out_str, fragment);
+    }
+    res
+}
+
+/// Options for rendering Markdown in the main body of documentation.
+pub fn main_body_opts() -> Options {
+    Options::ENABLE_TABLES
+        | Options::ENABLE_FOOTNOTES
+        | Options::ENABLE_STRIKETHROUGH
+        | Options::ENABLE_TASKLISTS
+        | Options::ENABLE_SMART_PUNCTUATION
+}
+
+fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
+    let mut stripped_segment = String::new();
+    let mut param_depth = 0;
+
+    let mut latest_generics_chunk = String::new();
+
+    for c in segment {
+        if c == '<' {
+            param_depth += 1;
+            latest_generics_chunk.clear();
+        } else if c == '>' {
+            param_depth -= 1;
+            if latest_generics_chunk.contains(" as ") {
+                // The segment tries to use fully-qualified syntax, which is currently unsupported.
+                // Give a helpful error message instead of completely ignoring the angle brackets.
+                return Err(MalformedGenerics::HasFullyQualifiedSyntax);
+            }
+        } else {
+            if param_depth == 0 {
+                stripped_segment.push(c);
+            } else {
+                latest_generics_chunk.push(c);
+            }
+        }
+    }
+
+    if param_depth == 0 {
+        Ok(stripped_segment)
+    } else {
+        // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
+        Err(MalformedGenerics::UnbalancedAngleBrackets)
+    }
+}
+
+pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
+    if !path_str.contains(['<', '>']) {
+        return Ok(path_str.into());
+    }
+    let mut stripped_segments = vec![];
+    let mut path = path_str.chars().peekable();
+    let mut segment = Vec::new();
+
+    while let Some(chr) = path.next() {
+        match chr {
+            ':' => {
+                if path.next_if_eq(&':').is_some() {
+                    let stripped_segment =
+                        strip_generics_from_path_segment(mem::take(&mut segment))?;
+                    if !stripped_segment.is_empty() {
+                        stripped_segments.push(stripped_segment);
+                    }
+                } else {
+                    return Err(MalformedGenerics::InvalidPathSeparator);
+                }
+            }
+            '<' => {
+                segment.push(chr);
+
+                match path.next() {
+                    Some('<') => {
+                        return Err(MalformedGenerics::TooManyAngleBrackets);
+                    }
+                    Some('>') => {
+                        return Err(MalformedGenerics::EmptyAngleBrackets);
+                    }
+                    Some(chr) => {
+                        segment.push(chr);
+
+                        while let Some(chr) = path.next_if(|c| *c != '>') {
+                            segment.push(chr);
+                        }
+                    }
+                    None => break,
+                }
+            }
+            _ => segment.push(chr),
+        }
+        trace!("raw segment: {:?}", segment);
+    }
+
+    if !segment.is_empty() {
+        let stripped_segment = strip_generics_from_path_segment(segment)?;
+        if !stripped_segment.is_empty() {
+            stripped_segments.push(stripped_segment);
+        }
+    }
+
+    debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments);
+
+    let stripped_path = stripped_segments.join("::");
+
+    if !stripped_path.is_empty() {
+        Ok(stripped_path.into())
+    } else {
+        Err(MalformedGenerics::MissingType)
+    }
+}
+
+/// Returns whether the first doc-comment is an inner attribute.
+///
+//// If there are no doc-comments, return true.
+/// FIXME(#78591): Support both inner and outer attributes on the same item.
+pub fn inner_docs(attrs: &[ast::Attribute]) -> bool {
+    attrs.iter().find(|a| a.doc_str().is_some()).map_or(true, |a| a.style == ast::AttrStyle::Inner)
+}
+
+/// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]`.
+pub fn has_primitive_or_keyword_docs(attrs: &[ast::Attribute]) -> bool {
+    for attr in attrs {
+        if attr.has_name(sym::rustc_doc_primitive) {
+            return true;
+        } else if attr.has_name(sym::doc)
+            && let Some(items) = attr.meta_item_list()
+        {
+            for item in items {
+                if item.has_name(sym::keyword) {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
+/// Simplified version of the corresponding function in rustdoc.
+/// If the rustdoc version returns a successful result, this function must return the same result.
+/// Otherwise this function may return anything.
+fn preprocess_link(link: &str) -> Box<str> {
+    let link = link.replace('`', "");
+    let link = link.split('#').next().unwrap();
+    let link = link.trim();
+    let link = link.rsplit('@').next().unwrap();
+    let link = link.strip_suffix("()").unwrap_or(link);
+    let link = link.strip_suffix("{}").unwrap_or(link);
+    let link = link.strip_suffix("[]").unwrap_or(link);
+    let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
+    let link = link.trim();
+    strip_generics_from_path(link).unwrap_or_else(|_| link.into())
+}
+
+/// Keep inline and reference links `[]`,
+/// but skip autolinks `<>` which we never consider to be intra-doc links.
+pub fn may_be_doc_link(link_type: LinkType) -> bool {
+    match link_type {
+        LinkType::Inline
+        | LinkType::Reference
+        | LinkType::ReferenceUnknown
+        | LinkType::Collapsed
+        | LinkType::CollapsedUnknown
+        | LinkType::Shortcut
+        | LinkType::ShortcutUnknown => true,
+        LinkType::Autolink | LinkType::Email => false,
+    }
+}
+
+/// Simplified version of `preprocessed_markdown_links` from rustdoc.
+/// Must return at least the same links as it, but may add some more links on top of that.
+pub(crate) fn attrs_to_preprocessed_links(attrs: &[ast::Attribute]) -> Vec<Box<str>> {
+    let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
+    let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
+
+    parse_links(&doc)
+}
+
+/// Similiar version of `markdown_links` from rustdoc.
+/// This will collect destination links and display text if exists.
+fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
+    let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
+    let mut event_iter = Parser::new_with_broken_link_callback(
+        doc,
+        main_body_opts(),
+        Some(&mut broken_link_callback),
+    );
+    let mut links = Vec::new();
+
+    while let Some(event) = event_iter.next() {
+        match event {
+            Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
+                if matches!(
+                    link_type,
+                    LinkType::Inline
+                        | LinkType::ReferenceUnknown
+                        | LinkType::Reference
+                        | LinkType::Shortcut
+                        | LinkType::ShortcutUnknown
+                ) {
+                    if let Some(display_text) = collect_link_data(&mut event_iter) {
+                        links.push(display_text);
+                    }
+                }
+
+                links.push(preprocess_link(&dest));
+            }
+            _ => {}
+        }
+    }
+
+    links
+}
+
+/// Collects additional data of link.
+fn collect_link_data<'input, 'callback>(
+    event_iter: &mut Parser<'input, 'callback>,
+) -> Option<Box<str>> {
+    let mut display_text: Option<String> = None;
+    let mut append_text = |text: CowStr<'_>| {
+        if let Some(display_text) = &mut display_text {
+            display_text.push_str(&text);
+        } else {
+            display_text = Some(text.to_string());
+        }
+    };
+
+    while let Some(event) = event_iter.next() {
+        match event {
+            Event::Text(text) => {
+                append_text(text);
+            }
+            Event::Code(code) => {
+                append_text(code);
+            }
+            Event::End(_) => {
+                break;
+            }
+            _ => {}
+        }
+    }
+
+    display_text.map(String::into_boxed_str)
+}
+
+/// Returns a span encompassing all the document fragments.
+pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
+    if fragments.is_empty() {
+        return None;
+    }
+    let start = fragments[0].span;
+    if start == DUMMY_SP {
+        return None;
+    }
+    let end = fragments.last().expect("no doc strings provided").span;
+    Some(start.to(end))
+}
+
+/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
+///
+/// This method will return `None` if we cannot construct a span from the source map or if the
+/// fragments are not all sugared doc comments. It's difficult to calculate the correct span in
+/// that case due to escaping and other source features.
+pub fn source_span_for_markdown_range(
+    tcx: TyCtxt<'_>,
+    markdown: &str,
+    md_range: &Range<usize>,
+    fragments: &[DocFragment],
+) -> Option<Span> {
+    let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
+
+    if !is_all_sugared_doc {
+        return None;
+    }
+
+    let snippet = tcx.sess.source_map().span_to_snippet(span_of_fragments(fragments)?).ok()?;
+
+    let starting_line = markdown[..md_range.start].matches('\n').count();
+    let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
+
+    // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat
+    // CRLF and LF line endings the same way.
+    let mut src_lines = snippet.split_terminator('\n');
+    let md_lines = markdown.split_terminator('\n');
+
+    // The number of bytes from the source span to the markdown span that are not part
+    // of the markdown, like comment markers.
+    let mut start_bytes = 0;
+    let mut end_bytes = 0;
+
+    'outer: for (line_no, md_line) in md_lines.enumerate() {
+        loop {
+            let source_line = src_lines.next()?;
+            match source_line.find(md_line) {
+                Some(offset) => {
+                    if line_no == starting_line {
+                        start_bytes += offset;
+
+                        if starting_line == ending_line {
+                            break 'outer;
+                        }
+                    } else if line_no == ending_line {
+                        end_bytes += offset;
+                        break 'outer;
+                    } else if line_no < starting_line {
+                        start_bytes += source_line.len() - md_line.len();
+                    } else {
+                        end_bytes += source_line.len() - md_line.len();
+                    }
+                    break;
+                }
+                None => {
+                    // Since this is a source line that doesn't include a markdown line,
+                    // we have to count the newline that we split from earlier.
+                    if line_no <= starting_line {
+                        start_bytes += source_line.len() + 1;
+                    } else {
+                        end_bytes += source_line.len() + 1;
+                    }
+                }
+            }
+        }
+    }
+
+    Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
+        md_range.start + start_bytes,
+        md_range.end + start_bytes + end_bytes,
+    )))
+}