about summary refs log tree commit diff
diff options
context:
space:
mode:
author许杰友 Jieyou Xu (Joe) <39484203+jieyouxu@users.noreply.github.com>2025-05-27 01:29:20 +0800
committerGitHub <noreply@github.com>2025-05-27 01:29:20 +0800
commit408dc51f9798241da1e5d95b3add232e8b5b54b6 (patch)
treea6c06a6812675151a392bb5507b4e0f0be587493
parentafb57cadda3d1bb631951cc1a81fa6a13dc4a810 (diff)
parent245bf503e2a948ac98170516d11df632e85a948b (diff)
downloadrust-408dc51f9798241da1e5d95b3add232e8b5b54b6.tar.gz
rust-408dc51f9798241da1e5d95b3add232e8b5b54b6.zip
Rollup merge of #141516 - bend-n:okay, r=workingjubilee
speed up charsearcher for ascii chars

attempt at fixing rust-lang/rust#82471

this implementation should be valid because ascii characters are always one byte and there are no continuation bytes that overlap with ascii characters

im not completely sure that this is _always_ an improvement but it seems to be an improvement for this case and i dont think it can significantly regress any cases
-rw-r--r--library/core/src/str/iter.rs2
-rw-r--r--library/core/src/str/pattern.rs32
2 files changed, 32 insertions, 2 deletions
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs
index 425c4eaee28..49c581f352e 100644
--- a/library/core/src/str/iter.rs
+++ b/library/core/src/str/iter.rs
@@ -656,7 +656,7 @@ impl<'a, P: Pattern> SplitInternal<'a, P> {
         None
     }
 
-    #[inline]
+    #[inline(always)]
     fn next(&mut self) -> Option<&'a str> {
         if self.finished {
             return None;
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs
index bcbbb11c83b..e8189a2187b 100644
--- a/library/core/src/str/pattern.rs
+++ b/library/core/src/str/pattern.rs
@@ -429,8 +429,23 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
             SearchStep::Done
         }
     }
-    #[inline]
+    #[inline(always)]
     fn next_match(&mut self) -> Option<(usize, usize)> {
+        if self.utf8_size == 1 {
+            return match self
+                .haystack
+                .as_bytes()
+                .get(self.finger..self.finger_back)?
+                .iter()
+                .position(|x| *x == self.utf8_encoded[0])
+            {
+                Some(x) => {
+                    self.finger += x + 1;
+                    Some((self.finger - 1, self.finger))
+                }
+                None => None,
+            };
+        }
         loop {
             // get the haystack after the last character found
             let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?;
@@ -498,6 +513,21 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
     }
     #[inline]
     fn next_match_back(&mut self) -> Option<(usize, usize)> {
+        if self.utf8_size == 1 {
+            return match self
+                .haystack
+                .get(self.finger..self.finger_back)?
+                .as_bytes()
+                .iter()
+                .rposition(|&x| x == self.utf8_encoded[0])
+            {
+                Some(x) => {
+                    self.finger_back = self.finger + x;
+                    Some((self.finger_back, self.finger_back + 1))
+                }
+                None => None,
+            };
+        }
         let haystack = self.haystack.as_bytes();
         loop {
             // get the haystack up to but not including the last character searched