diff options
| author | Camelid <camelidcamel@gmail.com> | 2020-10-08 22:24:34 -0700 |
|---|---|---|
| committer | Camelid <camelidcamel@gmail.com> | 2020-10-08 22:24:34 -0700 |
| commit | 4c765f66a4f7921b4a47ab21f699bb2d8290b44a (patch) | |
| tree | 17cee6f4beb603a0125fd5e63cf08b57f60e5226 | |
| parent | 9ba1d21868968e1a4cbbe953371afbd43ad07c72 (diff) | |
| download | rust-4c765f66a4f7921b4a47ab21f699bb2d8290b44a.tar.gz rust-4c765f66a4f7921b4a47ab21f699bb2d8290b44a.zip | |
Allow generic parameters in intra-doc links
The contents of the generics will be mostly ignored (except for warning if fully-qualified syntax is used, which is currently unsupported in intra-doc links - see issue #74563). * Allow links like `Vec<T>`, `Result<T, E>`, and `Option<Box<T>>` * Allow links like `Vec::<T>::new()` * Warn on * Unbalanced angle brackets (e.g. `Vec<T` or `Vec<T>>`) * Missing type to apply generics to (`<T>` or `<Box<T>>`) * Use of fully-qualified syntax (`<Vec as IntoIterator>::into_iter`) * Invalid path separator (`Vec:<T>:new`) * Too many angle brackets (`Vec<<T>>`) * Empty angle brackets (`Vec<>`) Note that this implementation *does* allow some constructs that aren't valid in the actual Rust syntax, for example `Box::<T>new()`. That may not be supported in rustdoc in the future; it is an implementation detail.
| -rw-r--r-- | src/librustdoc/lib.rs | 2 | ||||
| -rw-r--r-- | src/librustdoc/passes/collect_intra_doc_links.rs | 203 | ||||
| -rw-r--r-- | src/test/rustdoc-ui/intra-link-errors.rs | 2 | ||||
| -rw-r--r-- | src/test/rustdoc-ui/intra-link-malformed-generics.rs | 19 | ||||
| -rw-r--r-- | src/test/rustdoc-ui/intra-link-malformed-generics.stderr | 102 | ||||
| -rw-r--r-- | src/test/rustdoc/intra-doc-link-generic-params.rs | 55 |
6 files changed, 381 insertions, 2 deletions
diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 7762e8f8d4f..12726d2bd9a 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -3,11 +3,13 @@ html_playground_url = "https://play.rust-lang.org/" )] #![feature(rustc_private)] +#![feature(array_methods)] #![feature(box_patterns)] #![feature(box_syntax)] #![feature(in_band_lifetimes)] #![feature(nll)] #![feature(or_patterns)] +#![feature(peekable_next_if)] #![feature(test)] #![feature(crate_visibility_modifier)] #![feature(never_type)] diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index b9be3e2f92b..33fa5a5033b 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -23,6 +23,7 @@ use smallvec::{smallvec, SmallVec}; use std::borrow::Cow; use std::cell::Cell; +use std::mem; use std::ops::Range; use crate::clean::*; @@ -65,10 +66,53 @@ enum ResolutionFailure<'a> { NotResolved { module_id: DefId, partial_res: Option<Res>, unresolved: Cow<'a, str> }, /// should not ever happen NoParentItem, + /// This link has malformed generic parameters; e.g., the angle brackets are unbalanced. + MalformedGenerics(MalformedGenerics), /// used to communicate that this should be ignored, but shouldn't be reported to the user Dummy, } +#[derive(Debug)] +enum MalformedGenerics { + /// This link has unbalanced angle brackets. + /// + /// For example, `Vec<T` should trigger this, as should `Vec<T>>`. + UnbalancedAngleBrackets, + /// The generics are not attached to a type. + /// + /// For example, `<T>` should trigger this. + /// + /// This is detected by checking if the path is empty after the generics are stripped. + MissingType, + /// The link uses fully-qualified syntax, which is currently unsupported. + /// + /// For example, `<Vec as IntoIterator>::into_iter` should trigger this. + /// + /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside + /// angle brackets. + HasFullyQualifiedSyntax, + /// The link has an invalid path separator. + /// + /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not** + /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be + /// called. + /// + /// Note that this will also **not** be triggered if the invalid path separator is inside angle + /// brackets because rustdoc mostly ignores what's inside angle brackets (except for + /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)). + /// + /// This is detected by checking if there is a colon followed by a non-colon in the link. + InvalidPathSeparator, + /// The link has too many angle brackets. + /// + /// For example, `Vec<<T>>` should trigger this. + TooManyAngleBrackets, + /// The link has empty angle brackets. + /// + /// For example, `Vec<>` should trigger this. + EmptyAngleBrackets, +} + impl ResolutionFailure<'a> { // This resolved fully (not just partially) but is erroneous for some other reason fn full_res(&self) -> Option<Res> { @@ -912,6 +956,7 @@ impl LinkCollector<'_, '_> { let link_text; let mut path_str; let disambiguator; + let stripped_path_string; let (mut res, mut fragment) = { path_str = if let Ok((d, path)) = Disambiguator::from_str(&link) { disambiguator = Some(d); @@ -922,7 +967,7 @@ impl LinkCollector<'_, '_> { } .trim(); - if path_str.contains(|ch: char| !(ch.is_alphanumeric() || ch == ':' || ch == '_')) { + if path_str.contains(|ch: char| !(ch.is_alphanumeric() || ":_<>, ".contains(ch))) { return None; } @@ -985,6 +1030,36 @@ impl LinkCollector<'_, '_> { module_id = DefId { krate, index: CRATE_DEF_INDEX }; } + // Strip generics from the path. + if path_str.contains(['<', '>'].as_slice()) { + stripped_path_string = match strip_generics_from_path(path_str) { + Ok(path) => path, + Err(err_kind) => { + debug!("link has malformed generics: {}", path_str); + resolution_failure( + self, + &item, + path_str, + disambiguator, + dox, + link_range, + smallvec![err_kind], + ); + return None; + } + }; + path_str = &stripped_path_string; + } + + // Sanity check to make sure we don't have any angle brackets after stripping generics. + assert!(!path_str.contains(['<', '>'].as_slice())); + + // The link is not an intra-doc link if it still contains commas or spaces after + // stripping generics. + if path_str.contains([',', ' '].as_slice()) { + return None; + } + match self.resolve_with_disambiguator( disambiguator, item, @@ -1718,6 +1793,27 @@ fn resolution_failure( diag.level = rustc_errors::Level::Bug; "all intra doc links should have a parent item".to_owned() } + ResolutionFailure::MalformedGenerics(variant) => match variant { + MalformedGenerics::UnbalancedAngleBrackets => { + String::from("unbalanced angle brackets") + } + MalformedGenerics::MissingType => { + String::from("missing type for generic parameters") + } + MalformedGenerics::HasFullyQualifiedSyntax => { + diag.note("see https://github.com/rust-lang/rust/issues/74563 for more information"); + String::from("fully-qualified syntax is unsupported") + } + MalformedGenerics::InvalidPathSeparator => { + String::from("has invalid path separator") + } + MalformedGenerics::TooManyAngleBrackets => { + String::from("too many angle brackets") + } + MalformedGenerics::EmptyAngleBrackets => { + String::from("empty angle brackets") + } + }, }; if let Some(span) = sp { diag.span_label(span, ¬e); @@ -1908,3 +2004,108 @@ fn is_primitive(path_str: &str, ns: Namespace) -> Option<(&'static str, Res)> { fn primitive_impl(cx: &DocContext<'_>, path_str: &str) -> Option<&'static SmallVec<[DefId; 4]>> { Some(PrimitiveType::from_symbol(Symbol::intern(path_str))?.impls(cx.tcx)) } + +fn strip_generics_from_path(path_str: &str) -> Result<String, ResolutionFailure<'static>> { + let mut stripped_segments = vec![]; + let mut path = path_str.chars().peekable(); + let mut segment = Vec::new(); + + while let Some(chr) = path.next() { + match chr { + ':' => { + if path.next_if_eq(&':').is_some() { + let stripped_segment = + strip_generics_from_path_segment(mem::take(&mut segment))?; + if !stripped_segment.is_empty() { + stripped_segments.push(stripped_segment); + } + } else { + return Err(ResolutionFailure::MalformedGenerics( + MalformedGenerics::InvalidPathSeparator, + )); + } + } + '<' => { + segment.push(chr); + + match path.peek() { + Some('<') => { + return Err(ResolutionFailure::MalformedGenerics( + MalformedGenerics::TooManyAngleBrackets, + )); + } + Some('>') => { + return Err(ResolutionFailure::MalformedGenerics( + MalformedGenerics::EmptyAngleBrackets, + )); + } + Some(_) => { + segment.push(path.next().unwrap()); + + while let Some(chr) = path.next_if(|c| *c != '>') { + segment.push(chr); + } + } + None => break, + } + } + _ => segment.push(chr), + } + debug!("raw segment: {:?}", segment); + } + + if !segment.is_empty() { + let stripped_segment = strip_generics_from_path_segment(segment)?; + if !stripped_segment.is_empty() { + stripped_segments.push(stripped_segment); + } + } + + debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments); + + let stripped_path = stripped_segments.join("::"); + + if !stripped_path.is_empty() { + Ok(stripped_path) + } else { + Err(ResolutionFailure::MalformedGenerics(MalformedGenerics::MissingType)) + } +} + +fn strip_generics_from_path_segment( + segment: Vec<char>, +) -> Result<String, ResolutionFailure<'static>> { + let mut stripped_segment = String::new(); + let mut param_depth = 0; + + let mut latest_generics_chunk = String::new(); + + for c in segment { + if c == '<' { + param_depth += 1; + latest_generics_chunk.clear(); + } else if c == '>' { + param_depth -= 1; + if latest_generics_chunk.contains(" as ") { + // The segment tries to use fully-qualified syntax, which is currently unsupported. + // Give a helpful error message instead of completely ignoring the angle brackets. + return Err(ResolutionFailure::MalformedGenerics( + MalformedGenerics::HasFullyQualifiedSyntax, + )); + } + } else { + if param_depth == 0 { + stripped_segment.push(c); + } else { + latest_generics_chunk.push(c); + } + } + } + + if param_depth == 0 { + Ok(stripped_segment) + } else { + // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>` + Err(ResolutionFailure::MalformedGenerics(MalformedGenerics::UnbalancedAngleBrackets)) + } +} diff --git a/src/test/rustdoc-ui/intra-link-errors.rs b/src/test/rustdoc-ui/intra-link-errors.rs index ef928ae02f3..81e42643ae8 100644 --- a/src/test/rustdoc-ui/intra-link-errors.rs +++ b/src/test/rustdoc-ui/intra-link-errors.rs @@ -2,7 +2,7 @@ //~^ NOTE lint level is defined // FIXME: this should say that it was skipped (maybe an allowed by default lint?) -/// [<invalid syntax>] +/// [invalid intra-doc syntax!!] /// [path::to::nonexistent::module] //~^ ERROR unresolved link diff --git a/src/test/rustdoc-ui/intra-link-malformed-generics.rs b/src/test/rustdoc-ui/intra-link-malformed-generics.rs new file mode 100644 index 00000000000..9c54092146f --- /dev/null +++ b/src/test/rustdoc-ui/intra-link-malformed-generics.rs @@ -0,0 +1,19 @@ +#![deny(broken_intra_doc_links)] + +//! [Vec<] //~ ERROR +//! [Vec<Box<T] //~ ERROR +//! [Vec<Box<T>] //~ ERROR +//! [Vec<Box<T>>>] //~ ERROR +//! [Vec<T>>>] //~ ERROR +//! [<Vec] //~ ERROR +//! [Vec::<] //~ ERROR +//! [<T>] //~ ERROR +//! [<invalid syntax>] //~ ERROR +//! [Vec:<T>:new()] //~ ERROR +//! [Vec<<T>>] //~ ERROR +//! [Vec<>] //~ ERROR +//! [Vec<<>>] //~ ERROR + +// FIXME(#74563) support UFCS +//! [<Vec as IntoIterator>::into_iter] //~ ERROR +//! [<Vec<T> as IntoIterator>::iter] //~ ERROR diff --git a/src/test/rustdoc-ui/intra-link-malformed-generics.stderr b/src/test/rustdoc-ui/intra-link-malformed-generics.stderr new file mode 100644 index 00000000000..fe5d4cd1bbf --- /dev/null +++ b/src/test/rustdoc-ui/intra-link-malformed-generics.stderr @@ -0,0 +1,102 @@ +error: unresolved link to `Vec<` + --> $DIR/intra-link-malformed-generics.rs:3:6 + | +LL | //! [Vec<] + | ^^^^ unbalanced angle brackets + | +note: the lint level is defined here + --> $DIR/intra-link-malformed-generics.rs:1:9 + | +LL | #![deny(broken_intra_doc_links)] + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: unresolved link to `Vec<Box<T` + --> $DIR/intra-link-malformed-generics.rs:4:6 + | +LL | //! [Vec<Box<T] + | ^^^^^^^^^ unbalanced angle brackets + +error: unresolved link to `Vec<Box<T>` + --> $DIR/intra-link-malformed-generics.rs:5:6 + | +LL | //! [Vec<Box<T>] + | ^^^^^^^^^^ unbalanced angle brackets + +error: unresolved link to `Vec<Box<T>>>` + --> $DIR/intra-link-malformed-generics.rs:6:6 + | +LL | //! [Vec<Box<T>>>] + | ^^^^^^^^^^^^ unbalanced angle brackets + +error: unresolved link to `Vec<T>>>` + --> $DIR/intra-link-malformed-generics.rs:7:6 + | +LL | //! [Vec<T>>>] + | ^^^^^^^^ unbalanced angle brackets + +error: unresolved link to `<Vec` + --> $DIR/intra-link-malformed-generics.rs:8:6 + | +LL | //! [<Vec] + | ^^^^ unbalanced angle brackets + +error: unresolved link to `Vec::<` + --> $DIR/intra-link-malformed-generics.rs:9:6 + | +LL | //! [Vec::<] + | ^^^^^^ unbalanced angle brackets + +error: unresolved link to `<T>` + --> $DIR/intra-link-malformed-generics.rs:10:6 + | +LL | //! [<T>] + | ^^^ missing type for generic parameters + +error: unresolved link to `<invalid syntax>` + --> $DIR/intra-link-malformed-generics.rs:11:6 + | +LL | //! [<invalid syntax>] + | ^^^^^^^^^^^^^^^^ missing type for generic parameters + +error: unresolved link to `Vec:<T>:new` + --> $DIR/intra-link-malformed-generics.rs:12:6 + | +LL | //! [Vec:<T>:new()] + | ^^^^^^^^^^^^^ has invalid path separator + +error: unresolved link to `Vec<<T>>` + --> $DIR/intra-link-malformed-generics.rs:13:6 + | +LL | //! [Vec<<T>>] + | ^^^^^^^^ too many angle brackets + +error: unresolved link to `Vec<>` + --> $DIR/intra-link-malformed-generics.rs:14:6 + | +LL | //! [Vec<>] + | ^^^^^ empty angle brackets + +error: unresolved link to `Vec<<>>` + --> $DIR/intra-link-malformed-generics.rs:15:6 + | +LL | //! [Vec<<>>] + | ^^^^^^^ too many angle brackets + +error: unresolved link to `<Vec as IntoIterator>::into_iter` + --> $DIR/intra-link-malformed-generics.rs:18:6 + | +LL | //! [<Vec as IntoIterator>::into_iter] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ fully-qualified syntax is unsupported + | + = note: see https://github.com/rust-lang/rust/issues/74563 for more information + +error: unresolved link to `<Vec<T> as IntoIterator>::iter` + --> $DIR/intra-link-malformed-generics.rs:19:6 + | +LL | //! [<Vec<T> as IntoIterator>::iter] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ fully-qualified syntax is unsupported + | + = note: see https://github.com/rust-lang/rust/issues/74563 for more information + +error: aborting due to 15 previous errors + diff --git a/src/test/rustdoc/intra-doc-link-generic-params.rs b/src/test/rustdoc/intra-doc-link-generic-params.rs new file mode 100644 index 00000000000..06735752974 --- /dev/null +++ b/src/test/rustdoc/intra-doc-link-generic-params.rs @@ -0,0 +1,55 @@ +// ignore-tidy-linelength + +#![crate_name = "foo"] + +//! Here's a link to [`Vec<T>`] and one to [`Box<Vec<Option<T>>>`]. +//! Here's a link to [`Iterator<Box<T>>::Item`]. +//! +//! And what about a link to [just `Option`](Option) and, [with the generic, `Option<T>`](Option<T>)? +//! +//! We should also try linking to [`Result<T, E>`]; it has *two* generics! +//! +//! Now let's test a trickier case: [`Vec::<T>::new`], or you could write it +//! [with parentheses as `Vec::<T>::new()`][Vec::<T>::new()]. +//! And what about something even harder? That would be [`Vec::<Box<T>>::new()`]. +//! +//! This is also pretty tricky: [`TypeId::of::<String>()`]. +//! And this too: [`Vec::<std::error::Error>::len`]. +//! +//! We unofficially and implicitly support things that aren't valid in the actual Rust syntax, like +//! [`Box::<T>new()`]. We may not support them in the future! +//! +//! These will be resolved as regular links: +//! - [`this is <invalid syntax> first`](https://www.rust-lang.org) +//! - [`this is <invalid syntax> twice`] +//! - [`<invalid syntax> thrice`](https://www.rust-lang.org) +//! - [`<invalid syntax> four times`][rlo] +//! - [a < b][rlo] +//! - [c > d] +//! +//! [`this is <invalid syntax> twice`]: https://www.rust-lang.org +//! [rlo]: https://www.rust-lang.org +//! [c > d]: https://www.rust-lang.org + +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html"]' 'Vec<T>' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html"]' 'Box<Vec<Option<T>>>' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/iter/traits/iterator/trait.Iterator.html#associatedtype.Item"]' 'Iterator<Box<T>>::Item' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"]' 'just Option' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"]' 'with the generic, Option<T>' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/result/enum.Result.html"]' 'Result<T, E>' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'Vec::<T>::new' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'with parentheses as Vec::<T>::new()' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.new"]' 'Vec::<Box<T>>::new()' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/core/any/struct.TypeId.html#method.of"]' 'TypeId::of::<String>()' +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html#method.len"]' 'Vec::<std::error::Error>::len' + +// @has foo/index.html '//a[@href="https://doc.rust-lang.org/nightly/alloc/boxed/struct.Box.html#method.new"]' 'Box::<T>new()' + +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'this is <invalid syntax> first' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'this is <invalid syntax> twice' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' '<invalid syntax> thrice' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' '<invalid syntax> four times' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'a < b' +// @has foo/index.html '//a[@href="https://www.rust-lang.org"]' 'c > d' + +use std::any::TypeId; |
