about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2020-10-01 18:16:02 +0000
committerbors <bors@rust-lang.org>2020-10-01 18:16:02 +0000
commit8fe73e80d762bc575040239fc05fb1c612873554 (patch)
treea0f1265e371692d886598d75e1c2a0f962edf2a6
parent2ad6187ce51eb3a304ca448b8676870f95ab5b11 (diff)
parentde623bfaf77a69d86e1561f6b4e0fdd9d193cf60 (diff)
downloadrust-8fe73e80d762bc575040239fc05fb1c612873554.tar.gz
rust-8fe73e80d762bc575040239fc05fb1c612873554.zip
Auto merge of #76971 - bugadani:issue-75659, r=Amanieu
Refactor memchr to allow optimization

Closes #75659

The implementation already uses naive search if the slice if short enough, but the case is complicated enough to not be optimized away. This PR refactors memchr so that it exists early when the slice is short enough.

Codegen-wise, as shown in #75659, memchr was not inlined previously so the only way I could find to test this is to check if there is no memchr call. Let me know if there is a more robust solution here.
-rw-r--r--library/core/src/slice/cmp.rs2
-rw-r--r--library/core/src/slice/memchr.rs40
-rw-r--r--library/core/src/slice/mod.rs1
-rw-r--r--src/test/codegen/issue-75659.rs63
4 files changed, 90 insertions, 16 deletions
diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs
index 27a358bddaf..ed26c59c17c 100644
--- a/library/core/src/slice/cmp.rs
+++ b/library/core/src/slice/cmp.rs
@@ -268,12 +268,14 @@ where
 }
 
 impl SliceContains for u8 {
+    #[inline]
     fn slice_contains(&self, x: &[Self]) -> bool {
         memchr::memchr(*self, x).is_some()
     }
 }
 
 impl SliceContains for i8 {
+    #[inline]
     fn slice_contains(&self, x: &[Self]) -> bool {
         let byte = *self as u8;
         // SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`
diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs
index 3b13ed5fed3..0c0f1750264 100644
--- a/library/core/src/slice/memchr.rs
+++ b/library/core/src/slice/memchr.rs
@@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080;
 // Use truncation.
 const LO_USIZE: usize = LO_U64 as usize;
 const HI_USIZE: usize = HI_U64 as usize;
+const USIZE_BYTES: usize = mem::size_of::<usize>();
 
 /// Returns `true` if `x` contains any zero byte.
 ///
@@ -38,19 +39,29 @@ fn repeat_byte(b: u8) -> usize {
 }
 
 /// Returns the first index matching the byte `x` in `text`.
+#[inline]
 pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
+    // Fast path for small slices
+    if text.len() < 2 * USIZE_BYTES {
+        return text.iter().position(|elt| *elt == x);
+    }
+
+    memchr_general_case(x, text)
+}
+
+fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
     // Scan for a single byte value by reading two `usize` words at a time.
     //
     // Split `text` in three parts
     // - unaligned initial part, before the first word aligned address in text
     // - body, scan by 2 words at a time
     // - the last remaining part, < 2 word size
+
+    // search up to an aligned boundary
     let len = text.len();
     let ptr = text.as_ptr();
-    let usize_bytes = mem::size_of::<usize>();
+    let mut offset = ptr.align_offset(USIZE_BYTES);
 
-    // search up to an aligned boundary
-    let mut offset = ptr.align_offset(usize_bytes);
     if offset > 0 {
         offset = cmp::min(offset, len);
         if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
@@ -60,22 +71,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
 
     // search the body of the text
     let repeated_x = repeat_byte(x);
+    while offset <= len - 2 * USIZE_BYTES {
+        unsafe {
+            let u = *(ptr.add(offset) as *const usize);
+            let v = *(ptr.add(offset + USIZE_BYTES) as *const usize);
 
-    if len >= 2 * usize_bytes {
-        while offset <= len - 2 * usize_bytes {
-            unsafe {
-                let u = *(ptr.add(offset) as *const usize);
-                let v = *(ptr.add(offset + usize_bytes) as *const usize);
-
-                // break if there is a matching byte
-                let zu = contains_zero_byte(u ^ repeated_x);
-                let zv = contains_zero_byte(v ^ repeated_x);
-                if zu || zv {
-                    break;
-                }
+            // break if there is a matching byte
+            let zu = contains_zero_byte(u ^ repeated_x);
+            let zv = contains_zero_byte(v ^ repeated_x);
+            if zu || zv {
+                break;
             }
-            offset += usize_bytes * 2;
         }
+        offset += USIZE_BYTES * 2;
     }
 
     // Find the byte after the point the body loop stopped.
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs
index 12dcd6c6ba8..4192632fe57 100644
--- a/library/core/src/slice/mod.rs
+++ b/library/core/src/slice/mod.rs
@@ -1636,6 +1636,7 @@ impl<T> [T] {
     /// assert!(!v.iter().any(|e| e == "hi"));
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
+    #[inline]
     pub fn contains(&self, x: &T) -> bool
     where
         T: PartialEq,
diff --git a/src/test/codegen/issue-75659.rs b/src/test/codegen/issue-75659.rs
new file mode 100644
index 00000000000..d093c841d68
--- /dev/null
+++ b/src/test/codegen/issue-75659.rs
@@ -0,0 +1,63 @@
+// This test checks that the call to memchr/slice_contains is optimized away
+// when searching in small slices.
+
+// compile-flags: -O
+// only-x86_64
+
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @foo1
+#[no_mangle]
+pub fn foo1(x: u8, data: &[u8; 1]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo2
+#[no_mangle]
+pub fn foo2(x: u8, data: &[u8; 2]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo3
+#[no_mangle]
+pub fn foo3(x: u8, data: &[u8; 3]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo4
+#[no_mangle]
+pub fn foo4(x: u8, data: &[u8; 4]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo8
+#[no_mangle]
+pub fn foo8(x: u8, data: &[u8; 8]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo8_i8
+#[no_mangle]
+pub fn foo8_i8(x: i8, data: &[i8; 8]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    !data.contains(&x)
+}
+
+// Check that the general case isn't inlined
+// CHECK-LABEL: @foo80
+#[no_mangle]
+pub fn foo80(x: u8, data: &[u8; 80]) -> bool {
+    // CHECK: call core::slice::memchr
+    data.contains(&x)
+}