From 4c2959fb12a6bd083003ec4371126211402e265d Mon Sep 17 00:00:00 2001 From: Matthias Krüger Date: Sun, 18 Jul 2021 10:44:39 +0200 Subject: fix a bunch of clippy warnings clippy::bind_instead_of_map clippy::branches_sharing_code clippy::collapsible_match clippy::inconsistent_struct_constructor clippy::int_plus_one clippy::iter_count clippy::iter_nth_zero clippy::manual_range_contains clippy::match_like_matches_macro clippy::needless::collect clippy::needless_question_mark clippy::needless_return clippy::op_ref clippy::option_as_ref_deref clippy::ptr_arg clippy::redundant_clone clippy::redundant_closure clippy::redundant_static_lifetimes clippy::search_is_some clippy::#single_char_add_str clippy::single_char_pattern clippy::single_component_path_imports clippy::single_match clippy::skip_while_next clippy::unnecessary_lazy_evaluations clippy::unnecessary_unwrap clippy::useless_conversion clippy::useless_format --- src/string.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/string.rs') diff --git a/src/string.rs b/src/string.rs index 080c4f17788..0cb9d817ca2 100644 --- a/src/string.rs +++ b/src/string.rs @@ -57,7 +57,7 @@ impl<'a> StringFormat<'a> { /// This allows to fit more graphemes from the string on a line when /// SnippetState::EndWithLineFeed. fn max_width_without_indent(&self) -> Option { - Some(self.config.max_width().checked_sub(self.line_end.len())?) + self.config.max_width().checked_sub(self.line_end.len()) } } @@ -99,7 +99,7 @@ pub(crate) fn rewrite_string<'a>( if is_new_line(grapheme) { // take care of blank lines result = trim_end_but_line_feed(fmt.trim_end, result); - result.push_str("\n"); + result.push('\n'); if !is_bareline_ok && cur_start + i + 1 < graphemes.len() { result.push_str(&indent_without_newline); result.push_str(fmt.line_start); -- cgit 1.4.1-3-g733a5 From 368a9b7cef25c22c3e836453e73d8584b251b578 Mon Sep 17 00:00:00 2001 From: Stéphane Campinas Date: Wed, 2 Feb 2022 02:06:14 +0100 Subject: Handle non-ascii character at boundary (#5089) * Handle non-ascii character at boundary * Replace substraction underflow check with early termination --- src/string.rs | 5 ++++- tests/source/issue-5023.rs | 22 ++++++++++++++++++++++ tests/target/issue-5023.rs | 23 +++++++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 tests/source/issue-5023.rs create mode 100644 tests/target/issue-5023.rs (limited to 'src/string.rs') diff --git a/src/string.rs b/src/string.rs index 64ae15672df..b65aa5b33b2 100644 --- a/src/string.rs +++ b/src/string.rs @@ -278,6 +278,9 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] } cur_index }; + if max_width_index_in_input == 0 { + return SnippetState::EndOfInput(input.concat()); + } // Find the position in input for breaking the string if line_end.is_empty() @@ -301,7 +304,7 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] return if trim_end { SnippetState::LineEnd(input[..=url_index_end].concat(), index_plus_ws + 1) } else { - return SnippetState::LineEnd(input[..=index_plus_ws].concat(), index_plus_ws + 1); + SnippetState::LineEnd(input[..=index_plus_ws].concat(), index_plus_ws + 1) }; } diff --git a/tests/source/issue-5023.rs b/tests/source/issue-5023.rs new file mode 100644 index 00000000000..ae1c723eff7 --- /dev/null +++ b/tests/source/issue-5023.rs @@ -0,0 +1,22 @@ +// rustfmt-wrap_comments: true + +/// A comment to test special unicode characters on boundaries +/// 是,是,是,是,是,是,是,是,是,是,是,是 it should break right here this goes to the next line +fn main() { + if xxx { + let xxx = xxx + .into_iter() + .filter(|(xxx, xxx)| { + if let Some(x) = Some(1) { + // xxxxxxxxxxxxxxxxxx, xxxxxxxxxxxx, xxxxxxxxxxxxxxxxxxxx xxx xxxxxxx, xxxxx xxx + // xxxxxxxxxx. xxxxxxxxxxxxxxxx,xxxxxxxxxxxxxxxxx xxx xxxxxxx + // 是sdfadsdfxxxxxxxxx,sdfaxxxxxx_xxxxx_masdfaonxxx, + if false { + return true; + } + } + false + }) + .collect(); + } +} diff --git a/tests/target/issue-5023.rs b/tests/target/issue-5023.rs new file mode 100644 index 00000000000..4e84c7d9842 --- /dev/null +++ b/tests/target/issue-5023.rs @@ -0,0 +1,23 @@ +// rustfmt-wrap_comments: true + +/// A comment to test special unicode characters on boundaries +/// 是,是,是,是,是,是,是,是,是,是,是,是 it should break right here +/// this goes to the next line +fn main() { + if xxx { + let xxx = xxx + .into_iter() + .filter(|(xxx, xxx)| { + if let Some(x) = Some(1) { + // xxxxxxxxxxxxxxxxxx, xxxxxxxxxxxx, xxxxxxxxxxxxxxxxxxxx xxx xxxxxxx, xxxxx xxx + // xxxxxxxxxx. xxxxxxxxxxxxxxxx,xxxxxxxxxxxxxxxxx xxx xxxxxxx + // 是sdfadsdfxxxxxxxxx,sdfaxxxxxx_xxxxx_masdfaonxxx, + if false { + return true; + } + } + false + }) + .collect(); + } +} -- cgit 1.4.1-3-g733a5 From c65ba14d692bedab306b0426c36ab8f4fe4cbab2 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Sun, 13 Mar 2022 10:03:50 +0800 Subject: Fixes #5260 Fixes #5260 by checking if it is part of a type '::' --- src/comment.rs | 2 +- src/string.rs | 45 ++++++++++++++++++++++++++++++++++++++------- tests/source/issue-5260.rs | 14 ++++++++++++++ tests/target/issue-5260.rs | 13 +++++++++++++ 4 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 tests/source/issue-5260.rs create mode 100644 tests/target/issue-5260.rs (limited to 'src/string.rs') diff --git a/src/comment.rs b/src/comment.rs index f9d8a0fa70c..eb195b1f762 100644 --- a/src/comment.rs +++ b/src/comment.rs @@ -796,7 +796,7 @@ impl<'a> CommentRewrite<'a> { // 1) wrap_comments = true is configured // 2) The comment is not the start of a markdown header doc comment // 3) The comment width exceeds the shape's width - // 4) No URLS were found in the commnet + // 4) No URLS were found in the comment let should_wrap_comment = self.fmt.config.wrap_comments() && !is_markdown_header_doc_comment && unicode_str_width(line) > self.fmt.shape.width diff --git a/src/string.rs b/src/string.rs index b65aa5b33b2..78b72a50cb2 100644 --- a/src/string.rs +++ b/src/string.rs @@ -315,20 +315,21 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] // Found a whitespace and what is on its left side is big enough. Some(index) if index >= MIN_STRING => break_at(index), // No whitespace found, try looking for a punctuation instead - _ => match input[0..max_width_index_in_input] - .iter() - .rposition(|grapheme| is_punctuation(grapheme)) + _ => match (0..max_width_index_in_input) + .rev() + .skip_while(|pos| !is_valid_linebreak(input, *pos)) + .next() { // Found a punctuation and what is on its left side is big enough. Some(index) if index >= MIN_STRING => break_at(index), // Either no boundary character was found to the left of `input[max_chars]`, or the line // got too small. We try searching for a boundary character to the right. - _ => match input[max_width_index_in_input..] - .iter() - .position(|grapheme| is_whitespace(grapheme) || is_punctuation(grapheme)) + _ => match (max_width_index_in_input..input.len()) + .skip_while(|pos| !is_valid_linebreak(input, *pos)) + .next() { // A boundary was found after the line limit - Some(index) => break_at(max_width_index_in_input + index), + Some(index) => break_at(index), // No boundary to the right, the input cannot be broken None => SnippetState::EndOfInput(input.concat()), }, @@ -336,6 +337,23 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str] } } +fn is_valid_linebreak(input: &[&str], pos: usize) -> bool { + let is_whitespace = is_whitespace(input[pos]); + if is_whitespace { + return true; + } + let is_punctuation = is_punctuation(input[pos]); + if is_punctuation && !is_part_of_type(input, pos) { + return true; + } + false +} + +fn is_part_of_type(input: &[&str], pos: usize) -> bool { + input.get(pos..=pos + 1) == Some(&[":", ":"]) + || input.get(pos.saturating_sub(1)..=pos) == Some(&[":", ":"]) +} + fn is_new_line(grapheme: &str) -> bool { let bytes = grapheme.as_bytes(); bytes.starts_with(b"\n") || bytes.starts_with(b"\r\n") @@ -369,6 +387,19 @@ mod test { rewrite_string("eq_", &fmt, 2); } + #[test] + fn line_break_at_valid_points_test() { + let string = "[TheName](Dont::break::my::type::That::would::be::very::nice) break here"; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!( + break_string(20, false, "", &graphemes[..]), + SnippetState::LineEnd( + "[TheName](Dont::break::my::type::That::would::be::very::nice) ".to_string(), + 62 + ) + ); + } + #[test] fn should_break_on_whitespace() { let string = "Placerat felis. Mauris porta ante sagittis purus."; diff --git a/tests/source/issue-5260.rs b/tests/source/issue-5260.rs new file mode 100644 index 00000000000..c0606817290 --- /dev/null +++ b/tests/source/issue-5260.rs @@ -0,0 +1,14 @@ +// rustfmt-wrap_comments: true + +/// [MyType](VeryLongPathToMyType::NoLineBreak::Here::Okay::ThatWouldBeNice::Thanks) +fn documented_with_longtype() { + // # We're using a long type link, rustfmt should not break line + // on the type when `wrap_comments = true` +} + +/// VeryLongPathToMyType::JustMyType::But::VeryVery::Long::NoLineBreak::Here::Okay::ThatWouldBeNice::Thanks +fn documented_with_verylongtype() { + // # We're using a long type link, rustfmt should not break line + // on the type when `wrap_comments = true` +} + diff --git a/tests/target/issue-5260.rs b/tests/target/issue-5260.rs new file mode 100644 index 00000000000..171f6fa51b7 --- /dev/null +++ b/tests/target/issue-5260.rs @@ -0,0 +1,13 @@ +// rustfmt-wrap_comments: true + +/// [MyType](VeryLongPathToMyType::NoLineBreak::Here::Okay::ThatWouldBeNice::Thanks) +fn documented_with_longtype() { + // # We're using a long type link, rustfmt should not break line + // on the type when `wrap_comments = true` +} + +/// VeryLongPathToMyType::JustMyType::But::VeryVery::Long::NoLineBreak::Here::Okay::ThatWouldBeNice::Thanks +fn documented_with_verylongtype() { + // # We're using a long type link, rustfmt should not break line + // on the type when `wrap_comments = true` +} -- cgit 1.4.1-3-g733a5 From 9f58224123f8185edca08035b74d741fbc0620fb Mon Sep 17 00:00:00 2001 From: Charles Lew Date: Sat, 29 Jul 2023 09:10:09 +0800 Subject: Update Unicode data to 15.0 --- Cargo.lock | 20 ++++++++++---------- Cargo.toml | 2 +- src/string.rs | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/string.rs') diff --git a/Cargo.lock b/Cargo.lock index e867f2cb840..bcac61ef342 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -588,9 +588,9 @@ dependencies = [ "toml", "tracing", "tracing-subscriber", + "unicode-properties", "unicode-segmentation", "unicode-width", - "unicode_categories", ] [[package]] @@ -852,22 +852,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] -name = "unicode-segmentation" -version = "1.9.0" +name = "unicode-properties" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" +checksum = "c7f91c8b21fbbaa18853c3d0801c78f4fc94cdb976699bb03e832e75f7fd22f0" [[package]] -name = "unicode-width" -version = "0.1.9" +name = "unicode-segmentation" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" [[package]] -name = "unicode_categories" -version = "0.1.1" +name = "unicode-width" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "utf8parse" diff --git a/Cargo.toml b/Cargo.toml index f4c4bab37cb..5a800e95a0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,7 +54,7 @@ tracing = "0.1.37" tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } unicode-segmentation = "1.9" unicode-width = "0.1" -unicode_categories = "0.1" +unicode-properties = { version = "0.1", default-features = false, features = ["general-category"] } rustfmt-config_proc_macro = { version = "0.3", path = "config_proc_macro" } diff --git a/src/string.rs b/src/string.rs index 78b72a50cb2..cb666fff695 100644 --- a/src/string.rs +++ b/src/string.rs @@ -1,7 +1,7 @@ // Format string literals. use regex::Regex; -use unicode_categories::UnicodeCategories; +use unicode_properties::{GeneralCategory, UnicodeGeneralCategory}; use unicode_segmentation::UnicodeSegmentation; use crate::config::Config; @@ -366,7 +366,7 @@ fn is_whitespace(grapheme: &str) -> bool { fn is_punctuation(grapheme: &str) -> bool { grapheme .chars() - .all(UnicodeCategories::is_punctuation_other) + .all(|c| c.general_category() == GeneralCategory::OtherPunctuation) } fn graphemes_width(graphemes: &[&str]) -> usize { -- cgit 1.4.1-3-g733a5