From a8b5e706b7ed81a294150d2a909ca4d6dc3daeb3 Mon Sep 17 00:00:00 2001 From: binarycat Date: Tue, 27 May 2025 16:52:48 -0500 Subject: source_span_for_markdown_range: fix utf8 violation it is non-trivial to reproduce this bug through rustdoc, which uses this function less than clippy, so the regression test was added as a unit test instead of an integration test. --- compiler/rustc_resolve/src/rustdoc.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'compiler/rustc_resolve/src/rustdoc.rs') diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs index 01bb1324645..fa839d2748d 100644 --- a/compiler/rustc_resolve/src/rustdoc.rs +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -12,10 +12,14 @@ use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::unord::UnordSet; use rustc_middle::ty::TyCtxt; use rustc_span::def_id::DefId; +use rustc_span::source_map::SourceMap; use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym}; use thin_vec::ThinVec; use tracing::{debug, trace}; +#[cfg(test)] +mod tests; + #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum DocFragmentKind { /// A doc fragment created from a `///` or `//!` doc comment. @@ -531,10 +535,20 @@ pub fn source_span_for_markdown_range( markdown: &str, md_range: &Range, fragments: &[DocFragment], +) -> Option { + let map = tcx.sess.source_map(); + source_span_for_markdown_range_inner(map, markdown, md_range, fragments) +} + +// inner function used for unit testing +pub fn source_span_for_markdown_range_inner( + map: &SourceMap, + markdown: &str, + md_range: &Range, + fragments: &[DocFragment], ) -> Option { use rustc_span::BytePos; - let map = tcx.sess.source_map(); if let &[fragment] = &fragments && fragment.kind == DocFragmentKind::RawDoc && let Ok(snippet) = map.span_to_snippet(fragment.span) @@ -570,7 +584,13 @@ pub fn source_span_for_markdown_range( { // If there is either a match in a previous fragment, or // multiple matches in this fragment, there is ambiguity. - if match_data.is_none() && !snippet[match_start + 1..].contains(pat) { + // the snippet cannot be zero-sized, because it matches + // the pattern, which is checked to not be zero sized. + if match_data.is_none() + && !snippet.as_bytes()[match_start + 1..] + .windows(pat.len()) + .any(|s| s == pat.as_bytes()) + { match_data = Some((i, match_start)); } else { // Heirustic produced ambiguity, return nothing. -- cgit 1.4.1-3-g733a5