about summary refs log tree commit diff
path: root/library
diff options
context:
space:
mode:
authorMatthias Krüger <476013+matthiaskrgr@users.noreply.github.com>2025-07-28 08:36:53 +0200
committerGitHub <noreply@github.com>2025-07-28 08:36:53 +0200
commite36b844b4e433e1e8a208f6be18934b9ae8b3b71 (patch)
treebecd65ec26c85f294927f9425cf6fb56dafd2dfc /library
parent21120e297c3d2db6a1f28e1a8798777a52f4fee2 (diff)
parent7f7d343400de843c12f880b02ea1a7b22ccc7379 (diff)
downloadrust-e36b844b4e433e1e8a208f6be18934b9ae8b3b71.tar.gz
rust-e36b844b4e433e1e8a208f6be18934b9ae8b3b71.zip
Rollup merge of #144472 - okaneco:char_bound, r=Mark-Simulacrum
str: Mark unstable `round_char_boundary` feature functions as const

Mark `floor_char_boundary`, `ceil_char_boundary` const
Simplify the implementations, reducing the number of arithmetic operations

It seems unnecessary to do the lower/upper bounds calculations and extra slicing when we can jump straight to inspecting the bytes, assuming the underlying data is valid UTF-8.

Tracking issue https://github.com/rust-lang/rust/issues/93743
Diffstat (limited to 'library')
-rw-r--r--library/core/src/str/mod.rs40
1 files changed, 26 insertions, 14 deletions
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs
index 029abf17539..c40af4de7e0 100644
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@@ -407,17 +407,22 @@ impl str {
     /// ```
     #[unstable(feature = "round_char_boundary", issue = "93743")]
     #[inline]
-    pub fn floor_char_boundary(&self, index: usize) -> usize {
+    pub const fn floor_char_boundary(&self, index: usize) -> usize {
         if index >= self.len() {
             self.len()
         } else {
-            let lower_bound = index.saturating_sub(3);
-            let new_index = self.as_bytes()[lower_bound..=index]
-                .iter()
-                .rposition(|b| b.is_utf8_char_boundary());
-
-            // SAFETY: we know that the character boundary will be within four bytes
-            unsafe { lower_bound + new_index.unwrap_unchecked() }
+            let mut i = index;
+            while i > 0 {
+                if self.as_bytes()[i].is_utf8_char_boundary() {
+                    break;
+                }
+                i -= 1;
+            }
+
+            //  The character boundary will be within four bytes of the index
+            debug_assert!(i >= index.saturating_sub(3));
+
+            i
         }
     }
 
@@ -445,15 +450,22 @@ impl str {
     /// ```
     #[unstable(feature = "round_char_boundary", issue = "93743")]
     #[inline]
-    pub fn ceil_char_boundary(&self, index: usize) -> usize {
+    pub const fn ceil_char_boundary(&self, index: usize) -> usize {
         if index >= self.len() {
             self.len()
         } else {
-            let upper_bound = Ord::min(index + 4, self.len());
-            self.as_bytes()[index..upper_bound]
-                .iter()
-                .position(|b| b.is_utf8_char_boundary())
-                .map_or(upper_bound, |pos| pos + index)
+            let mut i = index;
+            while i < self.len() {
+                if self.as_bytes()[i].is_utf8_char_boundary() {
+                    break;
+                }
+                i += 1;
+            }
+
+            //  The character boundary will be within four bytes of the index
+            debug_assert!(i <= index + 3);
+
+            i
         }
     }