diff options
| author | Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com> | 2025-07-28 08:36:53 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-28 08:36:53 +0200 |
| commit | e36b844b4e433e1e8a208f6be18934b9ae8b3b71 (patch) | |
| tree | becd65ec26c85f294927f9425cf6fb56dafd2dfc /library | |
| parent | 21120e297c3d2db6a1f28e1a8798777a52f4fee2 (diff) | |
| parent | 7f7d343400de843c12f880b02ea1a7b22ccc7379 (diff) | |
| download | rust-e36b844b4e433e1e8a208f6be18934b9ae8b3b71.tar.gz rust-e36b844b4e433e1e8a208f6be18934b9ae8b3b71.zip | |
Rollup merge of #144472 - okaneco:char_bound, r=Mark-Simulacrum
str: Mark unstable `round_char_boundary` feature functions as const Mark `floor_char_boundary`, `ceil_char_boundary` const Simplify the implementations, reducing the number of arithmetic operations It seems unnecessary to do the lower/upper bounds calculations and extra slicing when we can jump straight to inspecting the bytes, assuming the underlying data is valid UTF-8. Tracking issue https://github.com/rust-lang/rust/issues/93743
Diffstat (limited to 'library')
| -rw-r--r-- | library/core/src/str/mod.rs | 40 |
1 files changed, 26 insertions, 14 deletions
diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 029abf17539..c40af4de7e0 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -407,17 +407,22 @@ impl str { /// ``` #[unstable(feature = "round_char_boundary", issue = "93743")] #[inline] - pub fn floor_char_boundary(&self, index: usize) -> usize { + pub const fn floor_char_boundary(&self, index: usize) -> usize { if index >= self.len() { self.len() } else { - let lower_bound = index.saturating_sub(3); - let new_index = self.as_bytes()[lower_bound..=index] - .iter() - .rposition(|b| b.is_utf8_char_boundary()); - - // SAFETY: we know that the character boundary will be within four bytes - unsafe { lower_bound + new_index.unwrap_unchecked() } + let mut i = index; + while i > 0 { + if self.as_bytes()[i].is_utf8_char_boundary() { + break; + } + i -= 1; + } + + // The character boundary will be within four bytes of the index + debug_assert!(i >= index.saturating_sub(3)); + + i } } @@ -445,15 +450,22 @@ impl str { /// ``` #[unstable(feature = "round_char_boundary", issue = "93743")] #[inline] - pub fn ceil_char_boundary(&self, index: usize) -> usize { + pub const fn ceil_char_boundary(&self, index: usize) -> usize { if index >= self.len() { self.len() } else { - let upper_bound = Ord::min(index + 4, self.len()); - self.as_bytes()[index..upper_bound] - .iter() - .position(|b| b.is_utf8_char_boundary()) - .map_or(upper_bound, |pos| pos + index) + let mut i = index; + while i < self.len() { + if self.as_bytes()[i].is_utf8_char_boundary() { + break; + } + i += 1; + } + + // The character boundary will be within four bytes of the index + debug_assert!(i <= index + 3); + + i } } |
