diff options
| author | 许杰友 Jieyou Xu (Joe) <39484203+jieyouxu@users.noreply.github.com> | 2025-05-27 01:29:20 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-27 01:29:20 +0800 |
| commit | 408dc51f9798241da1e5d95b3add232e8b5b54b6 (patch) | |
| tree | a6c06a6812675151a392bb5507b4e0f0be587493 | |
| parent | afb57cadda3d1bb631951cc1a81fa6a13dc4a810 (diff) | |
| parent | 245bf503e2a948ac98170516d11df632e85a948b (diff) | |
| download | rust-408dc51f9798241da1e5d95b3add232e8b5b54b6.tar.gz rust-408dc51f9798241da1e5d95b3add232e8b5b54b6.zip | |
Rollup merge of #141516 - bend-n:okay, r=workingjubilee
speed up charsearcher for ascii chars attempt at fixing rust-lang/rust#82471 this implementation should be valid because ascii characters are always one byte and there are no continuation bytes that overlap with ascii characters im not completely sure that this is _always_ an improvement but it seems to be an improvement for this case and i dont think it can significantly regress any cases
| -rw-r--r-- | library/core/src/str/iter.rs | 2 | ||||
| -rw-r--r-- | library/core/src/str/pattern.rs | 32 |
2 files changed, 32 insertions, 2 deletions
diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 425c4eaee28..49c581f352e 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -656,7 +656,7 @@ impl<'a, P: Pattern> SplitInternal<'a, P> { None } - #[inline] + #[inline(always)] fn next(&mut self) -> Option<&'a str> { if self.finished { return None; diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index bcbbb11c83b..e8189a2187b 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -429,8 +429,23 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> { SearchStep::Done } } - #[inline] + #[inline(always)] fn next_match(&mut self) -> Option<(usize, usize)> { + if self.utf8_size == 1 { + return match self + .haystack + .as_bytes() + .get(self.finger..self.finger_back)? + .iter() + .position(|x| *x == self.utf8_encoded[0]) + { + Some(x) => { + self.finger += x + 1; + Some((self.finger - 1, self.finger)) + } + None => None, + }; + } loop { // get the haystack after the last character found let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?; @@ -498,6 +513,21 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> { } #[inline] fn next_match_back(&mut self) -> Option<(usize, usize)> { + if self.utf8_size == 1 { + return match self + .haystack + .get(self.finger..self.finger_back)? + .as_bytes() + .iter() + .rposition(|&x| x == self.utf8_encoded[0]) + { + Some(x) => { + self.finger_back = self.finger + x; + Some((self.finger_back, self.finger_back + 1)) + } + None => None, + }; + } let haystack = self.haystack.as_bytes(); loop { // get the haystack up to but not including the last character searched |
