about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSimonas Kazlauskas <git@kazlauskas.me>2020-08-16 17:10:54 +0300
committerSimonas Kazlauskas <git@kazlauskas.me>2020-08-16 21:31:48 +0300
commit5d22b18bf28a54586008e9d8d4bfd72012c00e2e (patch)
tree4ca3500e0654687aee1d933df544dd4dcdeda021
parente7271da69a7f69d9aef931f30fbb403ffd49436e (diff)
downloadrust-5d22b18bf28a54586008e9d8d4bfd72012c00e2e.tar.gz
rust-5d22b18bf28a54586008e9d8d4bfd72012c00e2e.zip
Improve codegen of align_offset when stride == 1
Previously checking for `pmoda == 0` would get LLVM to generate branchy
code, when, for `stride = 1` the offset can be computed without such a
branch by doing effectively a `-p % a`.

For well-known (constant) alignments, with the new ordering of these
conditionals, we end up generating 2 to 3 cheap instructions on x86_64:

    movq    %rdi, %rax
    negl    %eax
    andl    $7, %eax

instead of 5+ as previously.

For unknown alignments the new code also generates just 3 instructions:

    negq    %rdi
    leaq    -1(%rsi), %rax
    andq    %rdi, %rax
-rw-r--r--library/core/src/ptr/mod.rs24
1 files changed, 11 insertions, 13 deletions
diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs
index 78308f97461..68b5d1df71c 100644
--- a/library/core/src/ptr/mod.rs
+++ b/library/core/src/ptr/mod.rs
@@ -1172,7 +1172,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
 
     /// Calculate multiplicative modular inverse of `x` modulo `m`.
     ///
-    /// This implementation is tailored for align_offset and has following preconditions:
+    /// This implementation is tailored for `align_offset` and has following preconditions:
     ///
     /// * `m` is a power-of-two;
     /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
@@ -1220,23 +1220,21 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
     }
 
     let stride = mem::size_of::<T>();
-    // SAFETY: `a` is a power-of-two, hence non-zero.
+    // SAFETY: `a` is a power-of-two, therefore non-zero.
     let a_minus_one = unsafe { unchecked_sub(a, 1) };
-    let pmoda = p as usize & a_minus_one;
+    if stride == 1 {
+        // `stride == 1` case can be computed more efficiently through `-p (mod a)`.
+        return wrapping_sub(0, p as usize) & a_minus_one;
+    }
 
+    let pmoda = p as usize & a_minus_one;
     if pmoda == 0 {
         // Already aligned. Yay!
         return 0;
-    }
-
-    if stride <= 1 {
-        return if stride == 0 {
-            // If the pointer is not aligned, and the element is zero-sized, then no amount of
-            // elements will ever align the pointer.
-            !0
-        } else {
-            wrapping_sub(a, pmoda)
-        };
+    } else if stride == 0 {
+        // If the pointer is not aligned, and the element is zero-sized, then no amount of
+        // elements will ever align the pointer.
+        return usize::MAX;
     }
 
     let smoda = stride & a_minus_one;