diff options
| author | WANG Rui <wangrui@loongson.cn> | 2025-06-24 20:19:10 +0800 |
|---|---|---|
| committer | WANG Rui <wangrui@loongson.cn> | 2025-07-29 21:38:37 +0800 |
| commit | 1ceacf55a0e207c08bbedfea422c0842a4983e3d (patch) | |
| tree | 02924a57985f31fd212b76aef62ac44f5e43dd15 /library | |
| parent | ace633090349fc5075b5b0d56294de985e7d1191 (diff) | |
| download | rust-1ceacf55a0e207c08bbedfea422c0842a4983e3d.tar.gz rust-1ceacf55a0e207c08bbedfea422c0842a4983e3d.zip | |
LoongArch64 LSX fast-path for `str.contains(&str)`
Benchmark results with LLVM 21 on LA664: ``` OLD: test bench_is_contained_in ... bench: 43.63 ns/iter (+/- 0.04) NEW: test bench_is_contained_in ... bench: 12.81 ns/iter (+/- 0.01) ```
Diffstat (limited to 'library')
| -rw-r--r-- | library/core/src/str/pattern.rs | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/library/core/src/str/pattern.rs b/library/core/src/str/pattern.rs index bcbbb11c83b..e116b138383 100644 --- a/library/core/src/str/pattern.rs +++ b/library/core/src/str/pattern.rs @@ -996,7 +996,10 @@ impl<'b> Pattern for &'b str { return haystack.as_bytes().contains(&self.as_bytes()[0]); } - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[cfg(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") + ))] if self.len() <= 32 { if let Some(result) = simd_contains(self, haystack) { return result; @@ -1770,11 +1773,18 @@ impl TwoWayStrategy for RejectAndMatch { /// If we ever ship std with for x86-64-v3 or adapt this for other platforms then wider vectors /// should be evaluated. /// +/// Similarly, on LoongArch the 128-bit LSX vector extension is the baseline, +/// so we also use `u8x16` there. Wider vector widths may be considered +/// for future LoongArch extensions (e.g., LASX). +/// /// For haystacks smaller than vector-size + needle length it falls back to /// a naive O(n*m) search so this implementation should not be called on larger needles. /// /// [0]: http://0x80.pl/articles/simd-strfind.html#sse-avx2 -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +#[cfg(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") +))] #[inline] fn simd_contains(needle: &str, haystack: &str) -> Option<bool> { let needle = needle.as_bytes(); @@ -1906,7 +1916,10 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> { /// # Safety /// /// Both slices must have the same length. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] // only called on x86 +#[cfg(any( + all(target_arch = "x86_64", target_feature = "sse2"), + all(target_arch = "loongarch64", target_feature = "lsx") +))] #[inline] unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool { debug_assert_eq!(x.len(), y.len()); |
