about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-05-08 23:47:39 +0000
committerbors <bors@rust-lang.org>2023-05-08 23:47:39 +0000
commit90c02c1bc119890f63dedcb4bee5a278a85ff62f (patch)
tree73791a158ba7239fcca776d8ac66ffb7cad71f7d
parent2f2c438dce75d8cc532c3baa849eeddc0901802c (diff)
parentb5ee324d79b310761b98173825895b16b859361b (diff)
downloadrust-90c02c1bc119890f63dedcb4bee5a278a85ff62f.tar.gz
rust-90c02c1bc119890f63dedcb4bee5a278a85ff62f.zip
Auto merge of #111296 - Sp00ph:const_gcd, r=nagisa,Mark-Simulacrum
Always const-evaluate the GCD in `slice::align_to_offsets`

Use an inline `const`-block to force the compiler to calculate the GCD at compile time, even in debug mode. This shouldn't affect the behavior of the program at all, but it drastically cuts down on the number of instructions emitted with optimizations disabled.

With the current implementation, a single `slice::align_to` instantiation (specifically `<[u8]>::align_to::<u128>()`) generates 676 instructions (on x86-64). Forcing the GCD computation to be const cuts it down to 327 instructions, so just over 50% less. This is obviously not representative of actual runtime gains, but I still see it as a significant win as long as it doesn't degrade compile times.

Not having to worry about LLVM const-evaluating the GCD function also allows it to use the textbook recursive euclidean algorithm instead of a much more complicated iterative implementation with multiple `unsafe`-blocks.
-rw-r--r--library/core/src/slice/mod.rs43
1 files changed, 6 insertions, 37 deletions
diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs
index 4c891ba550f..c2e9ba273a5 100644
--- a/library/core/src/slice/mod.rs
+++ b/library/core/src/slice/mod.rs
@@ -3478,44 +3478,13 @@ impl<T> [T] {
         // Ts = size_of::<U> / gcd(size_of::<T>, size_of::<U>)
         //
         // Luckily since all this is constant-evaluated... performance here matters not!
-        #[inline]
-        fn gcd(a: usize, b: usize) -> usize {
-            use crate::intrinsics;
-            // iterative stein’s algorithm
-            // We should still make this `const fn` (and revert to recursive algorithm if we do)
-            // because relying on llvm to consteval all this is… well, it makes me uncomfortable.
-
-            // SAFETY: `a` and `b` are checked to be non-zero values.
-            let (ctz_a, mut ctz_b) = unsafe {
-                if a == 0 {
-                    return b;
-                }
-                if b == 0 {
-                    return a;
-                }
-                (intrinsics::cttz_nonzero(a), intrinsics::cttz_nonzero(b))
-            };
-            let k = ctz_a.min(ctz_b);
-            let mut a = a >> ctz_a;
-            let mut b = b;
-            loop {
-                // remove all factors of 2 from b
-                b >>= ctz_b;
-                if a > b {
-                    mem::swap(&mut a, &mut b);
-                }
-                b = b - a;
-                // SAFETY: `b` is checked to be non-zero.
-                unsafe {
-                    if b == 0 {
-                        break;
-                    }
-                    ctz_b = intrinsics::cttz_nonzero(b);
-                }
-            }
-            a << k
+        const fn gcd(a: usize, b: usize) -> usize {
+            if b == 0 { a } else { gcd(b, a % b) }
         }
-        let gcd: usize = gcd(mem::size_of::<T>(), mem::size_of::<U>());
+
+        // Explicitly wrap the function call in a const block so it gets
+        // constant-evaluated even in debug mode.
+        let gcd: usize = const { gcd(mem::size_of::<T>(), mem::size_of::<U>()) };
         let ts: usize = mem::size_of::<U>() / gcd;
         let us: usize = mem::size_of::<T>() / gcd;