diff options
| author | bors <bors@rust-lang.org> | 2021-04-14 04:53:24 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-04-14 04:53:24 +0000 |
| commit | 24921df7bd2cea06aba991909db7c9515799a40b (patch) | |
| tree | 0815acaa741d1f2103f89f27a02c3a50fd5174b1 | |
| parent | 19740d9334d1f4260a2851c3db7a7e70eb3d2ec3 (diff) | |
| parent | cbdebd97ec4846391dc0f9a1288a3ab1fc053f99 (diff) | |
| download | rust-24921df7bd2cea06aba991909db7c9515799a40b.tar.gz rust-24921df7bd2cea06aba991909db7c9515799a40b.zip | |
Auto merge of #7039 - phansch:melt-ice, r=flip1995
tabs_in_doc_comments: Fix ICE due to char indexing This is a quick-fix for an ICE in `tabs_in_doc_comments`. The problem was that we we're indexing into possibly multi-byte characters, such as '位'. More specifically `get_chunks_of_tabs` was returning indices into multi-byte characters. Those were passed on to a `Span` creation that then caused the ICE. This fix makes sure that we don't return indices that point inside a multi-byte character. *However*, we are still iterating over unicode codepoints, not grapheme clusters. So a seemingly single character like y̆ , which actually consists of two codepoints, will probably still cause incorrect spans in the output. But I don't think we handle those cases anywhere in Clippy currently? Fixes #5835 changelog: Fix ICE in `tabs_in_doc_comments`
| -rw-r--r-- | clippy_lints/src/tabs_in_doc_comments.rs | 31 | ||||
| -rw-r--r-- | tests/ui/crashes/ice-5835.rs | 9 | ||||
| -rw-r--r-- | tests/ui/crashes/ice-5835.stderr | 10 |
3 files changed, 39 insertions, 11 deletions
diff --git a/clippy_lints/src/tabs_in_doc_comments.rs b/clippy_lints/src/tabs_in_doc_comments.rs index a0492a88f91..e2c144709f5 100644 --- a/clippy_lints/src/tabs_in_doc_comments.rs +++ b/clippy_lints/src/tabs_in_doc_comments.rs @@ -104,30 +104,32 @@ fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> { // tracker to decide if the last group of tabs is not closed by a non-tab character let mut is_active = false; - let chars_array: Vec<_> = the_str.chars().collect(); + // Note that we specifically need the char _byte_ indices here, not the positional indexes + // within the char array to deal with multi-byte characters properly. `char_indices` does + // exactly that. It provides an iterator over tuples of the form `(byte position, char)`. + let char_indices: Vec<_> = the_str.char_indices().collect(); - if chars_array == vec!['\t'] { + if let [(_, '\t')] = char_indices.as_slice() { return vec![(0, 1)]; } - for (index, arr) in chars_array.windows(2).enumerate() { - let index = u32::try_from(index).expect(line_length_way_to_long); - match arr { - ['\t', '\t'] => { + for entry in char_indices.windows(2) { + match entry { + [(_, '\t'), (_, '\t')] => { // either string starts with double tab, then we have to set it active, // otherwise is_active is true anyway is_active = true; }, - [_, '\t'] => { + [(_, _), (index_b, '\t')] => { // as ['\t', '\t'] is excluded, this has to be a start of a tab group, // set indices accordingly is_active = true; - current_start = index + 1; + current_start = u32::try_from(*index_b).unwrap(); }, - ['\t', _] => { + [(_, '\t'), (index_b, _)] => { // this now has to be an end of the group, hence we have to push a new tuple is_active = false; - spans.push((current_start, index + 1)); + spans.push((current_start, u32::try_from(*index_b).unwrap())); }, _ => {}, } @@ -137,7 +139,7 @@ fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> { if is_active { spans.push(( current_start, - u32::try_from(the_str.chars().count()).expect(line_length_way_to_long), + u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long), )); } @@ -149,6 +151,13 @@ mod tests_for_get_chunks_of_tabs { use super::get_chunks_of_tabs; #[test] + fn test_unicode_han_string() { + let res = get_chunks_of_tabs(" \u{4f4d}\t"); + + assert_eq!(res, vec![(4, 5)]); + } + + #[test] fn test_empty_string() { let res = get_chunks_of_tabs(""); diff --git a/tests/ui/crashes/ice-5835.rs b/tests/ui/crashes/ice-5835.rs new file mode 100644 index 00000000000..5e99cb432b6 --- /dev/null +++ b/tests/ui/crashes/ice-5835.rs @@ -0,0 +1,9 @@ +#[rustfmt::skip] +pub struct Foo { + /// 位 + /// ^ Do not remove this tab character. + /// It was required to trigger the ICE. + pub bar: u8, +} + +fn main() {} diff --git a/tests/ui/crashes/ice-5835.stderr b/tests/ui/crashes/ice-5835.stderr new file mode 100644 index 00000000000..c972bcb60a0 --- /dev/null +++ b/tests/ui/crashes/ice-5835.stderr @@ -0,0 +1,10 @@ +error: using tabs in doc comments is not recommended + --> $DIR/ice-5835.rs:3:10 + | +LL | /// 位 + | ^^^^ help: consider using four spaces per tab + | + = note: `-D clippy::tabs-in-doc-comments` implied by `-D warnings` + +error: aborting due to previous error + |
