about summary refs log tree commit diff
diff options
context:
space:
mode:
authorScott McMurray <scottmcm@users.noreply.github.com>2022-10-02 12:26:58 -0700
committerScott McMurray <scottmcm@users.noreply.github.com>2022-10-02 12:26:58 -0700
commit31cd0aa823a379b6c0d0f66ba4172585d1780e8b (patch)
tree0a8ee7464e6389545ca1651df2d14a53362c5985
parentfe217c28ffc6955f0927d8e8715d43d727debe5a (diff)
downloadrust-31cd0aa823a379b6c0d0f66ba4172585d1780e8b.tar.gz
rust-31cd0aa823a379b6c0d0f66ba4172585d1780e8b.zip
Do the `calloc` optimization for `Option<bool>`
Inspired by <https://old.reddit.com/r/rust/comments/xtiqj8/why_is_this_functional_version_faster_than_my_for/iqqy37b/>.
-rw-r--r--library/alloc/src/vec/is_zero.rs22
-rw-r--r--src/test/codegen/vec-calloc.rs19
2 files changed, 40 insertions, 1 deletions
diff --git a/library/alloc/src/vec/is_zero.rs b/library/alloc/src/vec/is_zero.rs
index 2e025c8a4a5..8e652d676dc 100644
--- a/library/alloc/src/vec/is_zero.rs
+++ b/library/alloc/src/vec/is_zero.rs
@@ -160,3 +160,25 @@ unsafe impl<T: IsZero> IsZero for Saturating<T> {
         self.0.is_zero()
     }
 }
+
+macro_rules! impl_for_optional_bool {
+    ($($t:ty,)+) => {$(
+        unsafe impl IsZero for $t {
+            #[inline]
+            fn is_zero(&self) -> bool {
+                // SAFETY: This is *not* a stable layout guarantee, but
+                // inside `core` we're allowed to rely on the current rustc
+                // behaviour that options of bools will be one byte with
+                // no padding, so long as they're nested less than 254 deep.
+                let raw: u8 = unsafe { core::mem::transmute(*self) };
+                raw == 0
+            }
+        }
+    )+};
+}
+impl_for_optional_bool! {
+    Option<bool>,
+    Option<Option<bool>>,
+    Option<Option<Option<bool>>>,
+    // Could go further, but not worth the metadata overhead
+}
diff --git a/src/test/codegen/vec-calloc.rs b/src/test/codegen/vec-calloc.rs
index 435a4ab5187..ae6e448f172 100644
--- a/src/test/codegen/vec-calloc.rs
+++ b/src/test/codegen/vec-calloc.rs
@@ -1,4 +1,4 @@
-// compile-flags: -O
+// compile-flags: -O -Z merge-functions=disabled
 // only-x86_64
 // ignore-debug
 // min-llvm-version: 15.0
@@ -144,6 +144,23 @@ pub fn vec_non_zero_tuple(n: usize) -> Vec<(i16, u8, char)> {
     vec![(0, 0, 'A'); n]
 }
 
+// CHECK-LABEL: @vec_option_bool
+#[no_mangle]
+pub fn vec_option_bool(n: usize) -> Vec<Option<bool>> {
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: ret void
+    vec![Some(false); n]
+}
+
 // Ensure that __rust_alloc_zeroed gets the right attributes for LLVM to optimize it away.
 // CHECK: declare noalias ptr @__rust_alloc_zeroed(i64, i64 allocalign) unnamed_addr [[RUST_ALLOC_ZEROED_ATTRS:#[0-9]+]]