diff options
| author | Simonas Kazlauskas <git@kazlauskas.me> | 2020-08-16 17:10:54 +0300 |
|---|---|---|
| committer | Simonas Kazlauskas <git@kazlauskas.me> | 2020-08-16 21:31:48 +0300 |
| commit | 5d22b18bf28a54586008e9d8d4bfd72012c00e2e (patch) | |
| tree | 4ca3500e0654687aee1d933df544dd4dcdeda021 | |
| parent | e7271da69a7f69d9aef931f30fbb403ffd49436e (diff) | |
| download | rust-5d22b18bf28a54586008e9d8d4bfd72012c00e2e.tar.gz rust-5d22b18bf28a54586008e9d8d4bfd72012c00e2e.zip | |
Improve codegen of align_offset when stride == 1
Previously checking for `pmoda == 0` would get LLVM to generate branchy
code, when, for `stride = 1` the offset can be computed without such a
branch by doing effectively a `-p % a`.
For well-known (constant) alignments, with the new ordering of these
conditionals, we end up generating 2 to 3 cheap instructions on x86_64:
movq %rdi, %rax
negl %eax
andl $7, %eax
instead of 5+ as previously.
For unknown alignments the new code also generates just 3 instructions:
negq %rdi
leaq -1(%rsi), %rax
andq %rdi, %rax
| -rw-r--r-- | library/core/src/ptr/mod.rs | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 78308f97461..68b5d1df71c 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -1172,7 +1172,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize { /// Calculate multiplicative modular inverse of `x` modulo `m`. /// - /// This implementation is tailored for align_offset and has following preconditions: + /// This implementation is tailored for `align_offset` and has following preconditions: /// /// * `m` is a power-of-two; /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead) @@ -1220,23 +1220,21 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize { } let stride = mem::size_of::<T>(); - // SAFETY: `a` is a power-of-two, hence non-zero. + // SAFETY: `a` is a power-of-two, therefore non-zero. let a_minus_one = unsafe { unchecked_sub(a, 1) }; - let pmoda = p as usize & a_minus_one; + if stride == 1 { + // `stride == 1` case can be computed more efficiently through `-p (mod a)`. + return wrapping_sub(0, p as usize) & a_minus_one; + } + let pmoda = p as usize & a_minus_one; if pmoda == 0 { // Already aligned. Yay! return 0; - } - - if stride <= 1 { - return if stride == 0 { - // If the pointer is not aligned, and the element is zero-sized, then no amount of - // elements will ever align the pointer. - !0 - } else { - wrapping_sub(a, pmoda) - }; + } else if stride == 0 { + // If the pointer is not aligned, and the element is zero-sized, then no amount of + // elements will ever align the pointer. + return usize::MAX; } let smoda = stride & a_minus_one; |
