From 4c2959fb12a6bd083003ec4371126211402e265d Mon Sep 17 00:00:00 2001
From: Matthias Krüger <matthias.krueger@famsik.de>
Date: Sun, 18 Jul 2021 10:44:39 +0200
Subject: fix a bunch of clippy warnings

clippy::bind_instead_of_map
clippy::branches_sharing_code
clippy::collapsible_match
clippy::inconsistent_struct_constructor
clippy::int_plus_one
clippy::iter_count
clippy::iter_nth_zero
clippy::manual_range_contains
clippy::match_like_matches_macro
clippy::needless::collect
clippy::needless_question_mark
clippy::needless_return
clippy::op_ref
clippy::option_as_ref_deref
clippy::ptr_arg
clippy::redundant_clone
clippy::redundant_closure
clippy::redundant_static_lifetimes
clippy::search_is_some
clippy::#single_char_add_str
clippy::single_char_pattern
clippy::single_component_path_imports
clippy::single_match
clippy::skip_while_next
clippy::unnecessary_lazy_evaluations
clippy::unnecessary_unwrap
clippy::useless_conversion
clippy::useless_format
---
 src/string.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/string.rs')
diff --git a/src/string.rs b/src/string.rs
index 080c4f17788..0cb9d817ca2 100644
--- a/src/string.rs
+++ b/src/string.rs
@@ -57,7 +57,7 @@ impl<'a> StringFormat<'a> {
     /// This allows to fit more graphemes from the string on a line when
     /// SnippetState::EndWithLineFeed.
     fn max_width_without_indent(&self) -> Option<usize> {
-        Some(self.config.max_width().checked_sub(self.line_end.len())?)
+        self.config.max_width().checked_sub(self.line_end.len())
     }
 }
 
@@ -99,7 +99,7 @@ pub(crate) fn rewrite_string<'a>(
                 if is_new_line(grapheme) {
                     // take care of blank lines
                     result = trim_end_but_line_feed(fmt.trim_end, result);
-                    result.push_str("\n");
+                    result.push('\n');
                     if !is_bareline_ok && cur_start + i + 1 < graphemes.len() {
                         result.push_str(&indent_without_newline);
                         result.push_str(fmt.line_start);
-- 
cgit 1.4.1-3-g733a5


From 368a9b7cef25c22c3e836453e73d8584b251b578 Mon Sep 17 00:00:00 2001
From: Stéphane Campinas <stephane.campinas@gmail.com>
Date: Wed, 2 Feb 2022 02:06:14 +0100
Subject: Handle non-ascii character at boundary (#5089)

* Handle non-ascii character at boundary

* Replace substraction underflow check with early termination
---
 src/string.rs              |  5 ++++-
 tests/source/issue-5023.rs | 22 ++++++++++++++++++++++
 tests/target/issue-5023.rs | 23 +++++++++++++++++++++++
 3 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 tests/source/issue-5023.rs
 create mode 100644 tests/target/issue-5023.rs

(limited to 'src/string.rs')

diff --git a/src/string.rs b/src/string.rs
index 64ae15672df..b65aa5b33b2 100644
--- a/src/string.rs
+++ b/src/string.rs
@@ -278,6 +278,9 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str]
         }
         cur_index
     };
+    if max_width_index_in_input == 0 {
+        return SnippetState::EndOfInput(input.concat());
+    }
 
     // Find the position in input for breaking the string
     if line_end.is_empty()
@@ -301,7 +304,7 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str]
         return if trim_end {
             SnippetState::LineEnd(input[..=url_index_end].concat(), index_plus_ws + 1)
         } else {
-            return SnippetState::LineEnd(input[..=index_plus_ws].concat(), index_plus_ws + 1);
+            SnippetState::LineEnd(input[..=index_plus_ws].concat(), index_plus_ws + 1)
         };
     }
 
diff --git a/tests/source/issue-5023.rs b/tests/source/issue-5023.rs
new file mode 100644
index 00000000000..ae1c723eff7
--- /dev/null
+++ b/tests/source/issue-5023.rs
@@ -0,0 +1,22 @@
+// rustfmt-wrap_comments: true
+
+/// A comment to test special unicode characters on boundaries
+/// 是，是，是，是，是，是，是，是，是，是，是，是  it should break right here this goes to the next line
+fn main() {
+    if xxx {
+        let xxx = xxx
+            .into_iter()
+            .filter(|(xxx, xxx)| {
+                if let Some(x) = Some(1) {
+                    // xxxxxxxxxxxxxxxxxx, xxxxxxxxxxxx, xxxxxxxxxxxxxxxxxxxx xxx xxxxxxx, xxxxx xxx
+                    // xxxxxxxxxx. xxxxxxxxxxxxxxxx，xxxxxxxxxxxxxxxxx xxx xxxxxxx
+                    // 是sdfadsdfxxxxxxxxx，sdfaxxxxxx_xxxxx_masdfaonxxx，
+                    if false {
+                        return true;
+                    }
+                }
+                false
+            })
+            .collect();
+    }
+}
diff --git a/tests/target/issue-5023.rs b/tests/target/issue-5023.rs
new file mode 100644
index 00000000000..4e84c7d9842
--- /dev/null
+++ b/tests/target/issue-5023.rs
@@ -0,0 +1,23 @@
+// rustfmt-wrap_comments: true
+
+/// A comment to test special unicode characters on boundaries
+/// 是，是，是，是，是，是，是，是，是，是，是，是  it should break right here
+/// this goes to the next line
+fn main() {
+    if xxx {
+        let xxx = xxx
+            .into_iter()
+            .filter(|(xxx, xxx)| {
+                if let Some(x) = Some(1) {
+                    // xxxxxxxxxxxxxxxxxx, xxxxxxxxxxxx, xxxxxxxxxxxxxxxxxxxx xxx xxxxxxx, xxxxx xxx
+                    // xxxxxxxxxx. xxxxxxxxxxxxxxxx，xxxxxxxxxxxxxxxxx xxx xxxxxxx
+                    // 是sdfadsdfxxxxxxxxx，sdfaxxxxxx_xxxxx_masdfaonxxx，
+                    if false {
+                        return true;
+                    }
+                }
+                false
+            })
+            .collect();
+    }
+}
-- 
cgit 1.4.1-3-g733a5


From c65ba14d692bedab306b0426c36ab8f4fe4cbab2 Mon Sep 17 00:00:00 2001
From: Pascal Seitz <pascal.seitz@gmail.com>
Date: Sun, 13 Mar 2022 10:03:50 +0800
Subject: Fixes #5260

Fixes #5260 by checking if it is part of a type '::'
---
 src/comment.rs             |  2 +-
 src/string.rs              | 45 ++++++++++++++++++++++++++++++++++++++-------
 tests/source/issue-5260.rs | 14 ++++++++++++++
 tests/target/issue-5260.rs | 13 +++++++++++++
 4 files changed, 66 insertions(+), 8 deletions(-)
 create mode 100644 tests/source/issue-5260.rs
 create mode 100644 tests/target/issue-5260.rs

(limited to 'src/string.rs')

diff --git a/src/comment.rs b/src/comment.rs
index f9d8a0fa70c..eb195b1f762 100644
--- a/src/comment.rs
+++ b/src/comment.rs
@@ -796,7 +796,7 @@ impl<'a> CommentRewrite<'a> {
         // 1) wrap_comments = true is configured
         // 2) The comment is not the start of a markdown header doc comment
         // 3) The comment width exceeds the shape's width
-        // 4) No URLS were found in the commnet
+        // 4) No URLS were found in the comment
         let should_wrap_comment = self.fmt.config.wrap_comments()
             && !is_markdown_header_doc_comment
             && unicode_str_width(line) > self.fmt.shape.width
diff --git a/src/string.rs b/src/string.rs
index b65aa5b33b2..78b72a50cb2 100644
--- a/src/string.rs
+++ b/src/string.rs
@@ -315,20 +315,21 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str]
         // Found a whitespace and what is on its left side is big enough.
         Some(index) if index >= MIN_STRING => break_at(index),
         // No whitespace found, try looking for a punctuation instead
-        _ => match input[0..max_width_index_in_input]
-            .iter()
-            .rposition(|grapheme| is_punctuation(grapheme))
+        _ => match (0..max_width_index_in_input)
+            .rev()
+            .skip_while(|pos| !is_valid_linebreak(input, *pos))
+            .next()
         {
             // Found a punctuation and what is on its left side is big enough.
             Some(index) if index >= MIN_STRING => break_at(index),
             // Either no boundary character was found to the left of `input[max_chars]`, or the line
             // got too small. We try searching for a boundary character to the right.
-            _ => match input[max_width_index_in_input..]
-                .iter()
-                .position(|grapheme| is_whitespace(grapheme) || is_punctuation(grapheme))
+            _ => match (max_width_index_in_input..input.len())
+                .skip_while(|pos| !is_valid_linebreak(input, *pos))
+                .next()
             {
                 // A boundary was found after the line limit
-                Some(index) => break_at(max_width_index_in_input + index),
+                Some(index) => break_at(index),
                 // No boundary to the right, the input cannot be broken
                 None => SnippetState::EndOfInput(input.concat()),
             },
@@ -336,6 +337,23 @@ fn break_string(max_width: usize, trim_end: bool, line_end: &str, input: &[&str]
     }
 }
 
+fn is_valid_linebreak(input: &[&str], pos: usize) -> bool {
+    let is_whitespace = is_whitespace(input[pos]);
+    if is_whitespace {
+        return true;
+    }
+    let is_punctuation = is_punctuation(input[pos]);
+    if is_punctuation && !is_part_of_type(input, pos) {
+        return true;
+    }
+    false
+}
+
+fn is_part_of_type(input: &[&str], pos: usize) -> bool {
+    input.get(pos..=pos + 1) == Some(&[":", ":"])
+        || input.get(pos.saturating_sub(1)..=pos) == Some(&[":", ":"])
+}
+
 fn is_new_line(grapheme: &str) -> bool {
     let bytes = grapheme.as_bytes();
     bytes.starts_with(b"\n") || bytes.starts_with(b"\r\n")
@@ -369,6 +387,19 @@ mod test {
         rewrite_string("eq_", &fmt, 2);
     }
 
+    #[test]
+    fn line_break_at_valid_points_test() {
+        let string = "[TheName](Dont::break::my::type::That::would::be::very::nice) break here";
+        let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::<Vec<&str>>();
+        assert_eq!(
+            break_string(20, false, "", &graphemes[..]),
+            SnippetState::LineEnd(
+                "[TheName](Dont::break::my::type::That::would::be::very::nice) ".to_string(),
+                62
+            )
+        );
+    }
+
     #[test]
     fn should_break_on_whitespace() {
         let string = "Placerat felis. Mauris porta ante sagittis purus.";
diff --git a/tests/source/issue-5260.rs b/tests/source/issue-5260.rs
new file mode 100644
index 00000000000..c0606817290
--- /dev/null
+++ b/tests/source/issue-5260.rs
@@ -0,0 +1,14 @@
+// rustfmt-wrap_comments: true
+
+/// [MyType](VeryLongPathToMyType::NoLineBreak::Here::Okay::ThatWouldBeNice::Thanks)
+fn documented_with_longtype() {
+    // # We're using a long type link, rustfmt should not break line
+    // on the type when `wrap_comments = true`
+}
+
+/// VeryLongPathToMyType::JustMyType::But::VeryVery::Long::NoLineBreak::Here::Okay::ThatWouldBeNice::Thanks
+fn documented_with_verylongtype() {
+    // # We're using a long type link, rustfmt should not break line
+    // on the type when `wrap_comments = true`
+}
+
diff --git a/tests/target/issue-5260.rs b/tests/target/issue-5260.rs
new file mode 100644
index 00000000000..171f6fa51b7
--- /dev/null
+++ b/tests/target/issue-5260.rs
@@ -0,0 +1,13 @@
+// rustfmt-wrap_comments: true
+
+/// [MyType](VeryLongPathToMyType::NoLineBreak::Here::Okay::ThatWouldBeNice::Thanks)
+fn documented_with_longtype() {
+    // # We're using a long type link, rustfmt should not break line
+    // on the type when `wrap_comments = true`
+}
+
+/// VeryLongPathToMyType::JustMyType::But::VeryVery::Long::NoLineBreak::Here::Okay::ThatWouldBeNice::Thanks
+fn documented_with_verylongtype() {
+    // # We're using a long type link, rustfmt should not break line
+    // on the type when `wrap_comments = true`
+}
-- 
cgit 1.4.1-3-g733a5


From 9f58224123f8185edca08035b74d741fbc0620fb Mon Sep 17 00:00:00 2001
From: Charles Lew <crlf0710@gmail.com>
Date: Sat, 29 Jul 2023 09:10:09 +0800
Subject: Update Unicode data to 15.0

---
 Cargo.lock    | 20 ++++++++++----------
 Cargo.toml    |  2 +-
 src/string.rs |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'src/string.rs')

diff --git a/Cargo.lock b/Cargo.lock
index e867f2cb840..bcac61ef342 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -588,9 +588,9 @@ dependencies = [
  "toml",
  "tracing",
  "tracing-subscriber",
+ "unicode-properties",
  "unicode-segmentation",
  "unicode-width",
- "unicode_categories",
 ]
 
 [[package]]
@@ -852,22 +852,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
 
 [[package]]
-name = "unicode-segmentation"
-version = "1.9.0"
+name = "unicode-properties"
+version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
+checksum = "c7f91c8b21fbbaa18853c3d0801c78f4fc94cdb976699bb03e832e75f7fd22f0"
 
 [[package]]
-name = "unicode-width"
-version = "0.1.9"
+name = "unicode-segmentation"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
+checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
 
 [[package]]
-name = "unicode_categories"
-version = "0.1.1"
+name = "unicode-width"
+version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
+checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
 
 [[package]]
 name = "utf8parse"
diff --git a/Cargo.toml b/Cargo.toml
index f4c4bab37cb..5a800e95a0d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,7 +54,7 @@ tracing = "0.1.37"
 tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
 unicode-segmentation = "1.9"
 unicode-width = "0.1"
-unicode_categories = "0.1"
+unicode-properties = { version = "0.1", default-features = false, features = ["general-category"] }
 
 rustfmt-config_proc_macro = { version = "0.3", path = "config_proc_macro" }
 
diff --git a/src/string.rs b/src/string.rs
index 78b72a50cb2..cb666fff695 100644
--- a/src/string.rs
+++ b/src/string.rs
@@ -1,7 +1,7 @@
 // Format string literals.
 
 use regex::Regex;
-use unicode_categories::UnicodeCategories;
+use unicode_properties::{GeneralCategory, UnicodeGeneralCategory};
 use unicode_segmentation::UnicodeSegmentation;
 
 use crate::config::Config;
@@ -366,7 +366,7 @@ fn is_whitespace(grapheme: &str) -> bool {
 fn is_punctuation(grapheme: &str) -> bool {
     grapheme
         .chars()
-        .all(UnicodeCategories::is_punctuation_other)
+        .all(|c| c.general_category() == GeneralCategory::OtherPunctuation)
 }
 
 fn graphemes_width(graphemes: &[&str]) -> usize {
-- 
cgit 1.4.1-3-g733a5