diff options
| author | binarycat <binarycat@envs.net> | 2025-05-27 16:52:48 -0500 | 
|---|---|---|
| committer | binarycat <binarycat@envs.net> | 2025-05-31 14:51:16 -0500 | 
| commit | a8b5e706b7ed81a294150d2a909ca4d6dc3daeb3 (patch) | |
| tree | c0ad03d7e7774b5190b1a68fcdd7ea7c09244b6d /compiler/rustc_resolve/src | |
| parent | 642e49bfed2481e54e252732be20d3c24cbec9e8 (diff) | |
| download | rust-a8b5e706b7ed81a294150d2a909ca4d6dc3daeb3.tar.gz rust-a8b5e706b7ed81a294150d2a909ca4d6dc3daeb3.zip | |
source_span_for_markdown_range: fix utf8 violation
it is non-trivial to reproduce this bug through rustdoc, which uses this function less than clippy, so the regression test was added as a unit test instead of an integration test.
Diffstat (limited to 'compiler/rustc_resolve/src')
| -rw-r--r-- | compiler/rustc_resolve/src/rustdoc.rs | 24 | ||||
| -rw-r--r-- | compiler/rustc_resolve/src/rustdoc/tests.rs | 50 | 
2 files changed, 72 insertions, 2 deletions
| diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs index 01bb1324645..fa839d2748d 100644 --- a/compiler/rustc_resolve/src/rustdoc.rs +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -12,10 +12,14 @@ use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::unord::UnordSet; use rustc_middle::ty::TyCtxt; use rustc_span::def_id::DefId; +use rustc_span::source_map::SourceMap; use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym}; use thin_vec::ThinVec; use tracing::{debug, trace}; +#[cfg(test)] +mod tests; + #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum DocFragmentKind { /// A doc fragment created from a `///` or `//!` doc comment. @@ -532,9 +536,19 @@ pub fn source_span_for_markdown_range( md_range: &Range<usize>, fragments: &[DocFragment], ) -> Option<Span> { + let map = tcx.sess.source_map(); + source_span_for_markdown_range_inner(map, markdown, md_range, fragments) +} + +// inner function used for unit testing +pub fn source_span_for_markdown_range_inner( + map: &SourceMap, + markdown: &str, + md_range: &Range<usize>, + fragments: &[DocFragment], +) -> Option<Span> { use rustc_span::BytePos; - let map = tcx.sess.source_map(); if let &[fragment] = &fragments && fragment.kind == DocFragmentKind::RawDoc && let Ok(snippet) = map.span_to_snippet(fragment.span) @@ -570,7 +584,13 @@ pub fn source_span_for_markdown_range( { // If there is either a match in a previous fragment, or // multiple matches in this fragment, there is ambiguity. - if match_data.is_none() && !snippet[match_start + 1..].contains(pat) { + // the snippet cannot be zero-sized, because it matches + // the pattern, which is checked to not be zero sized. + if match_data.is_none() + && !snippet.as_bytes()[match_start + 1..] + .windows(pat.len()) + .any(|s| s == pat.as_bytes()) + { match_data = Some((i, match_start)); } else { // Heirustic produced ambiguity, return nothing. diff --git a/compiler/rustc_resolve/src/rustdoc/tests.rs b/compiler/rustc_resolve/src/rustdoc/tests.rs new file mode 100644 index 00000000000..221ac907e7c --- /dev/null +++ b/compiler/rustc_resolve/src/rustdoc/tests.rs @@ -0,0 +1,50 @@ +use std::path::PathBuf; + +use rustc_span::source_map::{FilePathMapping, SourceMap}; +use rustc_span::symbol::sym; +use rustc_span::{BytePos, Span}; + +use super::{DocFragment, DocFragmentKind, source_span_for_markdown_range_inner}; + +#[test] +fn single_backtick() { + let sm = SourceMap::new(FilePathMapping::empty()); + sm.new_source_file(PathBuf::from("foo.rs").into(), r#"#[doc = "`"] fn foo() {}"#.to_string()); + let span = source_span_for_markdown_range_inner( + &sm, + "`", + &(0..1), + &[DocFragment { + span: Span::with_root_ctxt(BytePos(8), BytePos(11)), + item_id: None, + kind: DocFragmentKind::RawDoc, + doc: sym::empty, // unused placeholder + indent: 0, + }], + ) + .unwrap(); + assert_eq!(span.lo(), BytePos(9)); + assert_eq!(span.hi(), BytePos(10)); +} + +#[test] +fn utf8() { + // regression test for https://github.com/rust-lang/rust/issues/141665 + let sm = SourceMap::new(FilePathMapping::empty()); + sm.new_source_file(PathBuf::from("foo.rs").into(), r#"#[doc = "⚠"] fn foo() {}"#.to_string()); + let span = source_span_for_markdown_range_inner( + &sm, + "⚠", + &(0..3), + &[DocFragment { + span: Span::with_root_ctxt(BytePos(8), BytePos(14)), + item_id: None, + kind: DocFragmentKind::RawDoc, + doc: sym::empty, // unused placeholder + indent: 0, + }], + ) + .unwrap(); + assert_eq!(span.lo(), BytePos(9)); + assert_eq!(span.hi(), BytePos(12)); +} | 
