about summary refs log tree commit diff
diff options
context:
space:
mode:
authorCamelid <camelidcamel@gmail.com>2020-10-08 22:24:34 -0700
committerCamelid <camelidcamel@gmail.com>2020-10-08 22:24:34 -0700
commit4c765f66a4f7921b4a47ab21f699bb2d8290b44a (patch)
tree17cee6f4beb603a0125fd5e63cf08b57f60e5226
parent9ba1d21868968e1a4cbbe953371afbd43ad07c72 (diff)
downloadrust-4c765f66a4f7921b4a47ab21f699bb2d8290b44a.tar.gz
rust-4c765f66a4f7921b4a47ab21f699bb2d8290b44a.zip
Allow generic parameters in intra-doc links
The contents of the generics will be mostly ignored (except for warning
if fully-qualified syntax is used, which is currently unsupported in
intra-doc links - see issue #74563).

* Allow links like `Vec<T>`, `Result<T, E>`, and `Option<Box<T>>`
* Allow links like `Vec::<T>::new()`
* Warn on
  * Unbalanced angle brackets (e.g. `Vec<T` or `Vec<T>>`)
  * Missing type to apply generics to (`<T>` or `<Box<T>>`)
  * Use of fully-qualified syntax (`<Vec as IntoIterator>::into_iter`)
  * Invalid path separator (`Vec:<T>:new`)
  * Too many angle brackets (`Vec<<T>>`)
  * Empty angle brackets (`Vec<>`)

Note that this implementation *does* allow some constructs that aren't
valid in the actual Rust syntax, for example `Box::<T>new()`. That may
not be supported in rustdoc in the future; it is an implementation
detail.
-rw-r--r--src/librustdoc/lib.rs2
-rw-r--r--src/librustdoc/passes/collect_intra_doc_links.rs203
-rw-r--r--src/test/rustdoc-ui/intra-link-errors.rs2
-rw-r--r--src/test/rustdoc-ui/intra-link-malformed-generics.rs19
-rw-r--r--src/test/rustdoc-ui/intra-link-malformed-generics.stderr102
-rw-r--r--src/test/rustdoc/intra-doc-link-generic-params.rs55
6 files changed, 381 insertions, 2 deletions
diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs
index 7762e8f8d4f..12726d2bd9a 100644
--- a/src/librustdoc/lib.rs
+++ b/src/librustdoc/lib.rs
@@ -3,11 +3,13 @@
     html_playground_url = "https://play.rust-lang.org/"
 )]
 #![feature(rustc_private)]
+#![feature(array_methods)]
 #![feature(box_patterns)]
 #![feature(box_syntax)]
 #![feature(in_band_lifetimes)]
 #![feature(nll)]
 #![feature(or_patterns)]
+#![feature(peekable_next_if)]
 #![feature(test)]
 #![feature(crate_visibility_modifier)]
 #![feature(never_type)]
diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs
index b9be3e2f92b..33fa5a5033b 100644
--- a/src/librustdoc/passes/collect_intra_doc_links.rs
+++ b/src/librustdoc/passes/collect_intra_doc_links.rs
@@ -23,6 +23,7 @@ use smallvec::{smallvec, SmallVec};
 
 use std::borrow::Cow;
 use std::cell::Cell;
+use std::mem;
 use std::ops::Range;
 
 use crate::clean::*;
@@ -65,10 +66,53 @@ enum ResolutionFailure<'a> {
     NotResolved { module_id: DefId, partial_res: Option<Res>, unresolved: Cow<'a, str> },
     /// should not ever happen
     NoParentItem,
+    /// This link has malformed generic parameters; e.g., the angle brackets are unbalanced.
+    MalformedGenerics(MalformedGenerics),
     /// used to communicate that this should be ignored, but shouldn't be reported to the user
     Dummy,
 }
 
+#[derive(Debug)]
+enum MalformedGenerics {
+    /// This link has unbalanced angle brackets.
+    ///
+    /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
+    UnbalancedAngleBrackets,
+    /// The generics are not attached to a type.
+    ///
+    /// For example, `<T>` should trigger this.
+    ///
+    /// This is detected by checking if the path is empty after the generics are stripped.
+    MissingType,
+    /// The link uses fully-qualified syntax, which is currently unsupported.
+    ///
+    /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
+    ///
+    /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
+    /// angle brackets.
+    HasFullyQualifiedSyntax,
+    /// The link has an invalid path separator.
+    ///
+    /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
+    /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
+    /// called.
+    ///
+    /// Note that this will also **not** be triggered if the invalid path separator is inside angle
+    /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
+    /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
+    ///
+    /// This is detected by checking if there is a colon followed by a non-colon in the link.
+    InvalidPathSeparator,
+    /// The link has too many angle brackets.
+    ///
+    /// For example, `Vec<<T>>` should trigger this.
+    TooManyAngleBrackets,
+    /// The link has empty angle brackets.
+    ///
+    /// For example, `Vec<>` should trigger this.
+    EmptyAngleBrackets,
+}
+
 impl ResolutionFailure<'a> {
     // This resolved fully (not just partially) but is erroneous for some other reason
     fn full_res(&self) -> Option<Res> {
@@ -912,6 +956,7 @@ impl LinkCollector<'_, '_> {
         let link_text;
         let mut path_str;
         let disambiguator;
+        let stripped_path_string;
         let (mut res, mut fragment) = {
             path_str = if let Ok((d, path)) = Disambiguator::from_str(&link) {
                 disambiguator = Some(d);
@@ -922,7 +967,7 @@ impl LinkCollector<'_, '_> {
             }
             .trim();
 
-            if path_str.contains(|ch: char| !(ch.is_alphanumeric() || ch == ':' || ch == '_')) {
+            if path_str.contains(|ch: char| !(ch.is_alphanumeric() || ":_<>, ".contains(ch))) {
                 return None;
             }
 
@@ -985,6 +1030,36 @@ impl LinkCollector<'_, '_> {
                 module_id = DefId { krate, index: CRATE_DEF_INDEX };
             }
 
+            // Strip generics from the path.
+            if path_str.contains(['<', '>'].as_slice()) {
+                stripped_path_string = match strip_generics_from_path(path_str) {
+                    Ok(path) => path,
+                    Err(err_kind) => {
+                        debug!("link has malformed generics: {}", path_str);
+                        resolution_failure(
+                            self,
+                            &item,
+                            path_str,
+                            disambiguator,
+                            dox,
+                            link_range,
+                            smallvec![err_kind],
+                        );
+                        return None;
+                    }
+                };
+                path_str = &stripped_path_string;
+            }
+
+            // Sanity check to make sure we don't have any angle brackets after stripping generics.
+            assert!(!path_str.contains(['<', '>'].as_slice()));
+
+            // The link is not an intra-doc link if it still contains commas or spaces after
+            // stripping generics.
+            if path_str.contains([',', ' '].as_slice()) {
+                return None;
+            }
+
             match self.resolve_with_disambiguator(
                 disambiguator,
                 item,
@@ -1718,6 +1793,27 @@ fn resolution_failure(
                         diag.level = rustc_errors::Level::Bug;
                         "all intra doc links should have a parent item".to_owned()
                     }
+                    ResolutionFailure::MalformedGenerics(variant) => match variant {
+                        MalformedGenerics::UnbalancedAngleBrackets => {
+                            String::from("unbalanced angle brackets")
+                        }
+                        MalformedGenerics::MissingType => {
+                            String::from("missing type for generic parameters")
+                        }
+                        MalformedGenerics::HasFullyQualifiedSyntax => {
+                            diag.note("see https://github.com/rust-lang/rust/issues/74563 for more information");
+                            String::from("fully-qualified syntax is unsupported")
+                        }
+                        MalformedGenerics::InvalidPathSeparator => {
+                            String::from("has invalid path separator")
+                        }
+                        MalformedGenerics::TooManyAngleBrackets => {
+                            String::from("too many angle brackets")
+                        }
+                        MalformedGenerics::EmptyAngleBrackets => {
+                            String::from("empty angle brackets")
+                        }
+                    },
                 };
                 if let Some(span) = sp {
                     diag.span_label(span, &note);
@@ -1908,3 +2004,108 @@ fn is_primitive(path_str: &str, ns: Namespace) -> Option<(&'static str, Res)> {
 fn primitive_impl(cx: &DocContext<'_>, path_str: &str) -> Option<&'static SmallVec<[DefId; 4]>> {
     Some(PrimitiveType::from_symbol(Symbol::intern(path_str))?.impls(cx.tcx))
 }
+
+fn strip_generics_from_path(path_str: &str) -> Result<String, ResolutionFailure<'static>> {
+    let mut stripped_segments = vec![];
+    let mut path = path_str.chars().peekable();
+    let mut segment = Vec::new();
+
+    while let Some(chr) = path.next() {
+        match chr {
+            ':' => {
+                if path.next_if_eq(&':').is_some() {
+                    let stripped_segment =
+                        strip_generics_from_path_segment(mem::take(&mut segment))?;
+                    if !stripped_segment.is_empty() {
+                        stripped_segments.push(stripped_segment);
+                    }
+                } else {
+                    return Err(ResolutionFailure::MalformedGenerics(
+                        MalformedGenerics::InvalidPathSeparator,
+                    ));
+                }
+            }
+            '<' => {
+                segment.push(chr);
+
+                match path.peek() {
+                    Some('<') => {
+                        return Err(ResolutionFailure::MalformedGenerics(
+                            MalformedGenerics::TooManyAngleBrackets,
+                        ));
+                    }
+                    Some('>') => {
+                        return Err(ResolutionFailure::MalformedGenerics(
+                            MalformedGenerics::EmptyAngleBrackets,
+                        ));
+                    }
+                    Some(_) => {
+                        segment.push(path.next().unwrap());
+
+                        while let Some(chr) = path.next_if(|c| *c != '>') {
+                            segment.push(chr);
+                        }
+                    }
+                    None => break,
+                }
+            }
+            _ => segment.push(chr),
+        }
+        debug!("raw segment: {:?}", segment);
+    }
+
+    if !segment.is_empty() {
+        let stripped_segment = strip_generics_from_path_segment(segment)?;
+        if !stripped_segment.is_empty() {
+            stripped_segments.push(stripped_segment);
+        }
+    }
+
+    debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments);
+
+    let stripped_path = stripped_segments.join("::");
+
+    if !stripped_path.is_empty() {
+        Ok(stripped_path)
+    } else {
+        Err(ResolutionFailure::MalformedGenerics(MalformedGenerics::MissingType))
+    }
+}
+
+fn strip_generics_from_path_segment(
+    segment: Vec<char>,
+) -> Result<String, ResolutionFailure<'static>> {
+    let mut stripped_segment = String::new();
+    let mut param_depth = 0;
+
+    let mut latest_generics_chunk = String::new();
+
+    for c in segment {
+        if c == '<' {
+            param_depth += 1;
+            latest_generics_chunk.clear();
+        } else if c == '>' {
+            param_depth -= 1;
+            if latest_generics_chunk.contains(" as ") {
+                // The segment tries to use fully-qualified syntax, which is currently unsupported.
+                // Give a helpful error message instead of completely ignoring the angle brackets.
+                return Err(ResolutionFailure::MalformedGenerics(
+                    MalformedGenerics::HasFullyQualifiedSyntax,
+                ));
+            }
+        } else {
+            if param_depth == 0 {
+                stripped_segment.push(c);
+            } else {
+                latest_generics_chunk.push(c);
+            }
+        }
+    }
+
+    if param_depth == 0 {
+        Ok(stripped_segment)
+    } else {
+        // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
+        Err(ResolutionFailure::MalformedGenerics(MalformedGenerics::UnbalancedAngleBrackets))
+    }
+}
diff --git a/src/test/rustdoc-ui/intra-link-errors.rs b/src/test/rustdoc-ui/intra-link-errors.rs
index ef928ae02f3..81e42643ae8 100644
--- a/src/test/rustdoc-ui/intra-link-errors.rs
+++ b/src/test/rustdoc-ui/intra-link-errors.rs
@@ -2,7 +2,7 @@
 //~^ NOTE lint level is defined
 
 // FIXME: this should say that it was skipped (maybe an allowed by default lint?)
-/// [<invalid syntax>]
+/// [invalid intra-doc syntax!!]
 
 /// [path::to::nonexistent::module]
 //~^ ERROR unresolved link
diff --git a/src/test/rustdoc-ui/intra-link-malformed-generics.rs b/src/test/rustdoc-ui/intra-link-malformed-generics.rs
new file mode 100644
index 00000000000..9c54092146f
--- /dev/null
+++ b/src/test/rustdoc-ui/intra-link-malformed-generics.rs
@@ -0,0 +1,19 @@
+#![deny(broken_intra_doc_links)]
+
+//! [Vec<] //~ ERROR
+//! [Vec<Box<T] //~ ERROR
+//! [Vec<Box<T>] //~ ERROR
+//! [Vec<Box<T>>>] //~ ERROR
+//! [Vec<T>>>] //~ ERROR
+//! [<Vec] //~ ERROR
+//! [Vec::<] //~ ERROR
+//! [<T>] //~ ERROR
+//! [<invalid syntax>] //~ ERROR
+//! [Vec:<T>:new()] //~ ERROR
+//! [Vec<<T>>] //~ ERROR
+//! [Vec<>] //~ ERROR
+//! [Vec<<>>] //~ ERROR
+
+// FIXME(#74563) support UFCS
+//! [<Vec as IntoIterator>::into_iter] //~ ERROR
+//! [<Vec<T> as IntoIterator>::iter] //~ ERROR
diff --git a/src/test/rustdoc-ui/intra-link-malformed-generics.stderr b/src/test/rustdoc-ui/intra-link-malformed-generics.stderr
new file mode 100644
index 00000000000..fe5d4cd1bbf
--- /dev/null
+++ b/src/test/rustdoc-ui/intra-link-malformed-generics.stderr
@@ -0,0 +1,102 @@
+error: unresolved link to `Vec<`
+  --> $DIR/intra-link-malformed-generics.rs:3:6
+   |
+LL | //! [Vec<]
+   |      ^^^^ unbalanced angle brackets
+   |
+note: the lint level is defined here
+  --> $DIR/intra-link-malformed-generics.rs:1:9
+   |
+LL | #![deny(broken_intra_doc_links)]
+   |         ^^^^^^^^^^^^^^^^^^^^^^
+
+error: unresolved link to `Vec<Box<T`
+  --> $DIR/intra-link-malformed-generics.rs:4:6
+   |
+LL | //! [Vec<Box<T]
+   |      ^^^^^^^^^ unbalanced angle brackets
+
+error: unresolved link to `Vec<Box<T>`
+  --> $DIR/intra-link-malformed-generics.rs:5:6
+   |
+LL | //! [Vec<Box<T>]
+   |      ^^^^^^^^^^ unbalanced angle brackets
+
+error: unresolved link to `Vec<Box<T>>>`
+  --> $DIR/intra-link-malformed-generics.rs:6:6
+   |
+LL | //! [Vec<Box<T>>>]
+   |      ^^^^^^^^^^^^ unbalanced angle brackets
+
+error: unresolved link to `Vec<T>>>`
+  --> $DIR/intra-link-malformed-generics.rs:7:6
+   |
+LL | //! [Vec<T>>>]
+   |      ^^^^^^^^ unbalanced angle brackets
+
+error: unresolved link to `<Vec`
+  --> $DIR/intra-link-malformed-generics.rs:8:6
+   |
+LL | //! [<Vec]
+   |      ^^^^ unbalanced angle brackets
+
+error: unresolved link to `Vec::<`
+  --> $DIR/intra-link-malformed-generics.rs:9:6
+   |
+LL | //! [Vec::<]
+   |      ^^^^^^ unbalanced angle brackets
+
+error: unresolved link to `<T>`
+  --> $DIR/intra-link-malformed-generics.rs:10:6
+   |
+LL | //! [<T>]
+   |      ^^^ missing type for generic parameters
+
+error: unresolved link to `<invalid syntax>`
+  --> $DIR/intra-link-malformed-generics.rs:11:6
+   |
+LL | //! [<invalid syntax>]
+   |      ^^^^^^^^^^^^^^^^ missing type for generic parameters
+
+error: unresolved link to `Vec:<T>:new`
+  --> $DIR/intra-link-malformed-generics.rs:12:6
+   |
+LL | //! [Vec:<T>:new()]
+   |      ^^^^^^^^^^^^^ has invalid path separator
+
+error: unresolved link to `Vec<<T>>`
+  --> $DIR/intra-link-malformed-generics.rs:13:6
+   |
+LL | //! [Vec<<T>>]
+   |      ^^^^^^^^ too many angle brackets
+
+error: unresolved link to `Vec<>`
+  --> $DIR/intra-link-malformed-generics.rs:14:6
+   |
+LL | //! [Vec<>]
+   |      ^^^^^ empty angle brackets
+
+error: unresolved link to `Vec<<>>`
+  --> $DIR/intra-link-malformed-generics.rs:15:6
+   |
+LL | //! [Vec<<>>]
+   |      ^^^^^^^ too many angle brackets
+
+error: unresolved link to `<Vec as IntoIterator>::into_iter`
+  --> $DIR/intra-link-malformed-generics.rs:18:6
+   |
+LL | //! [<Vec as IntoIterator>::into_iter]
+   |      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ fully-qualified syntax is unsupported
+   |
+   = note: see https://github.com/rust-lang/rust/issues/74563 for more information
+
+error: unresolved link to `<Vec<T> as IntoIterator>::iter`
+  --> $DIR/intra-link-malformed-generics.rs:19:6
+   |
+LL | //! [<Vec<T> as IntoIterator>::iter]
+   |      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ fully-qualified syntax is unsupported
+   |
+   = note: see https://github.com/rust-lang/rust/issues/74563 for more information
+
+error: aborting due to 15 previous errors
+
diff --git a/src/test/rustdoc/intra-doc-link-generic-params.rs b/src/test/rustdoc/intra-doc-link-generic-params.rs
new file mode 100644
index 00000000000..06735752974
--- /dev/null
+++ b/src/test/rustdoc/intra-doc-link-generic-params.rs
@@ -0,0 +1,55 @@
+// ignore-tidy-linelength
+
+#![crate_name = "foo"]
+
+//! Here's a link to [`Vec<T>`] and one to [`Box<Vec<Option<T>>>`].
+//! Here's a link to [`Iterator<Box<T>>::Item`].
+//!
+//! And what about a link to [just `Option`](Option) and, [with the generic, `Option<T>`](Option<T>)?
+//!
+//! We should also try linking to [`Result<T, E>`]; it has *two* generics!
+//!
+//! Now let's test a trickier case: [`Vec::<T>::new`], or you could write it
+//! [with parentheses as `Vec::<T>::new()`][Vec::<T>::new()].
+//! And what about something even harder? That would be [`Vec::<Box<T>>::new()`].
+//!
+//! This is also pretty tricky: [`TypeId::of::<String>()`].
+//! And this too: [`Vec::<std::error::Error>::len`].
+//!
+//! We unofficially and implicitly support things that aren't valid in the actual Rust syntax, like
+//! [`Box::<T>new()`]. We may not support them in the future!
+//!
+//! These will be resolved as regular links:
+//! - [`this is <invalid syntax> first`](https://www.rust-lang.org)
+//! - [`this is <invalid syntax> twice`]
+//! - [`<invalid syntax> thrice`](https://www.rust-lang.org)
+//! - [`<invalid syntax> four times`][rlo]
+//! - [a < b][rlo]
+//! - [c > d]
+//!
+//! [`this is <invalid syntax> twice`]: https://www.rust-lang.org
+//! [rlo]: https://www.rust-lang.org
+//! [c > d]: https://www.rust-lang.org
+
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html"]' 'Vec<T>'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html"]' 'Box<Vec<Option<T>>>'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/iter/traits/iterator/trait.Iterator.html#associatedtype.Item"]' 'Iterator<Box<T>>::Item'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"]' 'just Option'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"]' 'with the generic, Option<T>'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/result/enum.Result.html"]' 'Result<T, E>'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'Vec::<T>::new'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'with parentheses as Vec::<T>::new()'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'Vec::<Box<T>>::new()'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/any/struct.TypeId.html#method.of"]' 'TypeId::of::<String>()'
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.len"]' 'Vec::<std::error::Error>::len'
+
+// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html#method.new"]' 'Box::<T>new()'
+
+// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'this is <invalid syntax> first'
+// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'this is <invalid syntax> twice'
+// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' '<invalid syntax> thrice'
+// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' '<invalid syntax> four times'
+// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'a < b'
+// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'c > d'
+
+use std::any::TypeId;