about summary refs log tree commit diff
diff options
context:
space:
mode:
authorGnomedDev <david2005thomas@gmail.com>2024-02-13 18:27:05 +0000
committerGnomedDev <david2005thomas@gmail.com>2024-02-13 18:28:48 +0000
commit601f2d192eb54722b0dd4619d6b70bc173354049 (patch)
tree036592c15bd4802370d7b2cc044f9ab5a4951df2
parente9271846294c4ee5bd7706df68180320c0b5ff20 (diff)
downloadrust-601f2d192eb54722b0dd4619d6b70bc173354049.tar.gz
rust-601f2d192eb54722b0dd4619d6b70bc173354049.zip
Store core::str::CharSearcher::utf8_size as u8
-rw-r--r--library/core/src/str/pattern.rs34
1 files changed, 23 insertions, 11 deletions
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs
index caa54e00f31..cc66da25795 100644
--- a/library/core/src/str/pattern.rs
+++ b/library/core/src/str/pattern.rs
@@ -40,6 +40,7 @@
 
 use crate::cmp;
 use crate::cmp::Ordering;
+use crate::convert::TryInto as _;
 use crate::fmt;
 use crate::slice::memchr;
 
@@ -370,11 +371,17 @@ pub struct CharSearcher<'a> {
 
     // safety invariant: `utf8_size` must be less than 5
     /// The number of bytes `needle` takes up when encoded in utf8.
-    utf8_size: usize,
+    utf8_size: u8,
     /// A utf8 encoded copy of the `needle`
     utf8_encoded: [u8; 4],
 }
 
+impl CharSearcher<'_> {
+    fn utf8_size(&self) -> usize {
+        self.utf8_size.into()
+    }
+}
+
 unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
     #[inline]
     fn haystack(&self) -> &'a str {
@@ -414,7 +421,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
             let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?;
             // the last byte of the utf8 encoded needle
             // SAFETY: we have an invariant that `utf8_size < 5`
-            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
+            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
             if let Some(index) = memchr::memchr(last_byte, bytes) {
                 // The new finger is the index of the byte we found,
                 // plus one, since we memchr'd for the last byte of the character.
@@ -434,10 +441,10 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
                 // find something. When we find something the `finger` will be set
                 // to a UTF8 boundary.
                 self.finger += index + 1;
-                if self.finger >= self.utf8_size {
-                    let found_char = self.finger - self.utf8_size;
+                if self.finger >= self.utf8_size() {
+                    let found_char = self.finger - self.utf8_size();
                     if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
-                        if slice == &self.utf8_encoded[0..self.utf8_size] {
+                        if slice == &self.utf8_encoded[0..self.utf8_size()] {
                             return Some((found_char, self.finger));
                         }
                     }
@@ -482,7 +489,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
             let bytes = haystack.get(self.finger..self.finger_back)?;
             // the last byte of the utf8 encoded needle
             // SAFETY: we have an invariant that `utf8_size < 5`
-            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
+            let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
             if let Some(index) = memchr::memrchr(last_byte, bytes) {
                 // we searched a slice that was offset by self.finger,
                 // add self.finger to recoup the original index
@@ -493,14 +500,14 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
                 // char in the paradigm of reverse iteration). For
                 // multibyte chars we need to skip down by the number of more
                 // bytes they have than ASCII
-                let shift = self.utf8_size - 1;
+                let shift = self.utf8_size() - 1;
                 if index >= shift {
                     let found_char = index - shift;
-                    if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
-                        if slice == &self.utf8_encoded[0..self.utf8_size] {
+                    if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size())) {
+                        if slice == &self.utf8_encoded[0..self.utf8_size()] {
                             // move finger to before the character found (i.e., at its start index)
                             self.finger_back = found_char;
-                            return Some((self.finger_back, self.finger_back + self.utf8_size));
+                            return Some((self.finger_back, self.finger_back + self.utf8_size()));
                         }
                     }
                 }
@@ -542,7 +549,12 @@ impl<'a> Pattern<'a> for char {
     #[inline]
     fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
         let mut utf8_encoded = [0; 4];
-        let utf8_size = self.encode_utf8(&mut utf8_encoded).len();
+        let utf8_size = self
+            .encode_utf8(&mut utf8_encoded)
+            .len()
+            .try_into()
+            .expect("char len should be less than 255");
+
         CharSearcher {
             haystack,
             finger: 0,