diff options
| author | bors <bors@rust-lang.org> | 2017-06-10 09:11:36 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2017-06-10 09:11:36 +0000 |
| commit | 995f741a0e3a57d4142c0590b3266514fa0a0e29 (patch) | |
| tree | f1f259211c1e5dd2635b06d03d511121c6571101 | |
| parent | e1480499b484d142dfa704ae20bd33eae518c1d0 (diff) | |
| parent | 6d86f0c018b57fcb9ca12c801939130b7f8e441e (diff) | |
| download | rust-995f741a0e3a57d4142c0590b3266514fa0a0e29.tar.gz rust-995f741a0e3a57d4142c0590b3266514fa0a0e29.zip | |
Auto merge of #42556 - scottmcm:ctz-nz, r=BurntSushi
Get LLVM to stop generating dead assembly in next_power_of_two
It turns out that LLVM can turn `@llvm.ctlz.i64(_, true)` into `@llvm.ctlz.i64(_, false)` ([`ctlz`](http://llvm.org/docs/LangRef.html#llvm-ctlz-intrinsic)) where valuable, but never does the opposite. That leads to some silly assembly getting generated in certain cases.
A contrived-but-clear example https://is.gd/VAIKuC:
```rust
fn foo(x:u64) -> u32 {
if x == 0 { return !0; }
x.leading_zeros()
}
```
Generates
```asm
testq %rdi, %rdi
je .LBB0_1
je .LBB0_3 ; <-- wha?
bsrq %rdi, %rax
xorq $63, %rax
retq
.LBB0_1:
movl $-1, %eax
retq
.LBB0_3:
movl $64, %eax ; <-- dead
retq
```
I noticed this in `next_power_of_two`, which without this PR generates the following:
```asm
cmpq $2, %rcx
jae .LBB1_2
movl $1, %eax
retq
.LBB1_2:
decq %rcx
je .LBB1_3
bsrq %rcx, %rcx
xorq $63, %rcx
jmp .LBB1_5
.LBB1_3:
movl $64, %ecx ; <-- dead
.LBB1_5:
movq $-1, %rax
shrq %cl, %rax
incq %rax
retq
```
And with this PR becomes
```asm
cmpq $2, %rcx
jae .LBB0_2
movl $1, %eax
retq
.LBB0_2:
decq %rcx
bsrq %rcx, %rcx
xorl $63, %ecx
movq $-1, %rax
shrq %cl, %rax
incq %rax
retq
```
| -rw-r--r-- | src/libcore/intrinsics.rs | 34 | ||||
| -rw-r--r-- | src/libcore/num/mod.rs | 17 | ||||
| -rw-r--r-- | src/librustc_trans/intrinsic.rs | 8 | ||||
| -rw-r--r-- | src/librustc_typeck/check/intrinsic.rs | 3 | ||||
| -rw-r--r-- | src/test/run-pass/intrinsics-integer.rs | 37 |
5 files changed, 96 insertions, 3 deletions
diff --git a/src/libcore/intrinsics.rs b/src/libcore/intrinsics.rs index 3566bbdebc2..8188c15a282 100644 --- a/src/libcore/intrinsics.rs +++ b/src/libcore/intrinsics.rs @@ -1229,6 +1229,23 @@ extern "rust-intrinsic" { /// ``` pub fn ctlz<T>(x: T) -> T; + /// Like `ctlz`, but extra-unsafe as it returns `undef` when + /// given an `x` with value `0`. + /// + /// # Examples + /// + /// ``` + /// #![feature(core_intrinsics)] + /// + /// use std::intrinsics::ctlz_nonzero; + /// + /// let x = 0b0001_1100_u8; + /// let num_leading = unsafe { ctlz_nonzero(x) }; + /// assert_eq!(num_leading, 3); + /// ``` + #[cfg(not(stage0))] + pub fn ctlz_nonzero<T>(x: T) -> T; + /// Returns the number of trailing unset bits (zeroes) in an integer type `T`. /// /// # Examples @@ -1256,6 +1273,23 @@ extern "rust-intrinsic" { /// ``` pub fn cttz<T>(x: T) -> T; + /// Like `cttz`, but extra-unsafe as it returns `undef` when + /// given an `x` with value `0`. + /// + /// # Examples + /// + /// ``` + /// #![feature(core_intrinsics)] + /// + /// use std::intrinsics::cttz_nonzero; + /// + /// let x = 0b0011_1000_u8; + /// let num_trailing = unsafe { cttz_nonzero(x) }; + /// assert_eq!(num_trailing, 3); + /// ``` + #[cfg(not(stage0))] + pub fn cttz_nonzero<T>(x: T) -> T; + /// Reverses the bytes in an integer type `T`. pub fn bswap<T>(x: T) -> T; diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index b76eac9e51f..62d75445cc9 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -1262,6 +1262,7 @@ macro_rules! uint_impl { ($SelfT:ty, $ActualT:ty, $BITS:expr, $ctpop:path, $ctlz:path, + $ctlz_nonzero:path, $cttz:path, $bswap:path, $add_with_overflow:path, @@ -2184,6 +2185,7 @@ macro_rules! uint_impl { // This method cannot overflow, as in the `next_power_of_two` // overflow cases it instead ends up returning the maximum value // of the type, and can return 0 for 0. + #[inline] fn one_less_than_next_power_of_two(self) -> Self { if self <= 1 { return 0; } @@ -2192,7 +2194,7 @@ macro_rules! uint_impl { // (such as intel pre-haswell) have more efficient ctlz // intrinsics when the argument is non-zero. let p = self - 1; - let z = p.leading_zeros(); + let z = unsafe { $ctlz_nonzero(p) }; <$SelfT>::max_value() >> z } @@ -2236,11 +2238,17 @@ macro_rules! uint_impl { } } +#[cfg(stage0)] +unsafe fn ctlz_nonzero<T>(x: T) -> T { intrinsics::ctlz(x) } +#[cfg(not(stage0))] +unsafe fn ctlz_nonzero<T>(x: T) -> T { intrinsics::ctlz_nonzero(x) } + #[lang = "u8"] impl u8 { uint_impl! { u8, u8, 8, intrinsics::ctpop, intrinsics::ctlz, + ctlz_nonzero, intrinsics::cttz, intrinsics::bswap, intrinsics::add_with_overflow, @@ -2253,6 +2261,7 @@ impl u16 { uint_impl! { u16, u16, 16, intrinsics::ctpop, intrinsics::ctlz, + ctlz_nonzero, intrinsics::cttz, intrinsics::bswap, intrinsics::add_with_overflow, @@ -2265,6 +2274,7 @@ impl u32 { uint_impl! { u32, u32, 32, intrinsics::ctpop, intrinsics::ctlz, + ctlz_nonzero, intrinsics::cttz, intrinsics::bswap, intrinsics::add_with_overflow, @@ -2277,6 +2287,7 @@ impl u64 { uint_impl! { u64, u64, 64, intrinsics::ctpop, intrinsics::ctlz, + ctlz_nonzero, intrinsics::cttz, intrinsics::bswap, intrinsics::add_with_overflow, @@ -2289,6 +2300,7 @@ impl u128 { uint_impl! { u128, u128, 128, intrinsics::ctpop, intrinsics::ctlz, + ctlz_nonzero, intrinsics::cttz, intrinsics::bswap, intrinsics::add_with_overflow, @@ -2302,6 +2314,7 @@ impl usize { uint_impl! { usize, u16, 16, intrinsics::ctpop, intrinsics::ctlz, + ctlz_nonzero, intrinsics::cttz, intrinsics::bswap, intrinsics::add_with_overflow, @@ -2314,6 +2327,7 @@ impl usize { uint_impl! { usize, u32, 32, intrinsics::ctpop, intrinsics::ctlz, + ctlz_nonzero, intrinsics::cttz, intrinsics::bswap, intrinsics::add_with_overflow, @@ -2327,6 +2341,7 @@ impl usize { uint_impl! { usize, u64, 64, intrinsics::ctpop, intrinsics::ctlz, + ctlz_nonzero, intrinsics::cttz, intrinsics::bswap, intrinsics::add_with_overflow, diff --git a/src/librustc_trans/intrinsic.rs b/src/librustc_trans/intrinsic.rs index f2e6aa3ef00..de908bb24a7 100644 --- a/src/librustc_trans/intrinsic.rs +++ b/src/librustc_trans/intrinsic.rs @@ -267,7 +267,7 @@ pub fn trans_intrinsic_call<'a, 'tcx>(bcx: &Builder<'a, 'tcx>, }; bcx.call(expect, &[llargs[0], C_i32(ccx, rw), llargs[1], C_i32(ccx, cache_type)], None) }, - "ctlz" | "cttz" | "ctpop" | "bswap" | + "ctlz" | "ctlz_nonzero" | "cttz" | "cttz_nonzero" | "ctpop" | "bswap" | "add_with_overflow" | "sub_with_overflow" | "mul_with_overflow" | "overflowing_add" | "overflowing_sub" | "overflowing_mul" | "unchecked_div" | "unchecked_rem" | "unchecked_shl" | "unchecked_shr" => { @@ -280,6 +280,12 @@ pub fn trans_intrinsic_call<'a, 'tcx>(bcx: &Builder<'a, 'tcx>, let llfn = ccx.get_intrinsic(&format!("llvm.{}.i{}", name, width)); bcx.call(llfn, &[llargs[0], y], None) } + "ctlz_nonzero" | "cttz_nonzero" => { + let y = C_bool(bcx.ccx, true); + let llvm_name = &format!("llvm.{}.i{}", &name[..4], width); + let llfn = ccx.get_intrinsic(llvm_name); + bcx.call(llfn, &[llargs[0], y], None) + } "ctpop" => bcx.call(ccx.get_intrinsic(&format!("llvm.ctpop.i{}", width)), &llargs, None), "bswap" => { diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs index daf202cd797..4d9f50b0fc0 100644 --- a/src/librustc_typeck/check/intrinsic.rs +++ b/src/librustc_typeck/check/intrinsic.rs @@ -272,7 +272,8 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, "volatile_store" => (1, vec![ tcx.mk_mut_ptr(param(0)), param(0) ], tcx.mk_nil()), - "ctpop" | "ctlz" | "cttz" | "bswap" => (1, vec![param(0)], param(0)), + "ctpop" | "ctlz" | "ctlz_nonzero" | "cttz" | "cttz_nonzero" | "bswap" => + (1, vec![param(0)], param(0)), "add_with_overflow" | "sub_with_overflow" | "mul_with_overflow" => (1, vec![param(0), param(0)], diff --git a/src/test/run-pass/intrinsics-integer.rs b/src/test/run-pass/intrinsics-integer.rs index 759dc515456..4896f02da20 100644 --- a/src/test/run-pass/intrinsics-integer.rs +++ b/src/test/run-pass/intrinsics-integer.rs @@ -14,7 +14,9 @@ mod rusti { extern "rust-intrinsic" { pub fn ctpop<T>(x: T) -> T; pub fn ctlz<T>(x: T) -> T; + pub fn ctlz_nonzero<T>(x: T) -> T; pub fn cttz<T>(x: T) -> T; + pub fn cttz_nonzero<T>(x: T) -> T; pub fn bswap<T>(x: T) -> T; } } @@ -68,6 +70,21 @@ pub fn main() { assert_eq!(ctlz(100u32), 25); assert_eq!(ctlz(100i32), 25); assert_eq!(ctlz(100u64), 57); assert_eq!(ctlz(100i64), 57); + assert_eq!(ctlz_nonzero(1u8), 7); assert_eq!(ctlz_nonzero(1i8), 7); + assert_eq!(ctlz_nonzero(1u16), 15); assert_eq!(ctlz_nonzero(1i16), 15); + assert_eq!(ctlz_nonzero(1u32), 31); assert_eq!(ctlz_nonzero(1i32), 31); + assert_eq!(ctlz_nonzero(1u64), 63); assert_eq!(ctlz_nonzero(1i64), 63); + + assert_eq!(ctlz_nonzero(10u8), 4); assert_eq!(ctlz_nonzero(10i8), 4); + assert_eq!(ctlz_nonzero(10u16), 12); assert_eq!(ctlz_nonzero(10i16), 12); + assert_eq!(ctlz_nonzero(10u32), 28); assert_eq!(ctlz_nonzero(10i32), 28); + assert_eq!(ctlz_nonzero(10u64), 60); assert_eq!(ctlz_nonzero(10i64), 60); + + assert_eq!(ctlz_nonzero(100u8), 1); assert_eq!(ctlz_nonzero(100i8), 1); + assert_eq!(ctlz_nonzero(100u16), 9); assert_eq!(ctlz_nonzero(100i16), 9); + assert_eq!(ctlz_nonzero(100u32), 25); assert_eq!(ctlz_nonzero(100i32), 25); + assert_eq!(ctlz_nonzero(100u64), 57); assert_eq!(ctlz_nonzero(100i64), 57); + assert_eq!(cttz(-1i8 as u8), 0); assert_eq!(cttz(-1i8), 0); assert_eq!(cttz(-1i16 as u16), 0); assert_eq!(cttz(-1i16), 0); assert_eq!(cttz(-1i32 as u32), 0); assert_eq!(cttz(-1i32), 0); @@ -93,6 +110,26 @@ pub fn main() { assert_eq!(cttz(100u32), 2); assert_eq!(cttz(100i32), 2); assert_eq!(cttz(100u64), 2); assert_eq!(cttz(100i64), 2); + assert_eq!(cttz_nonzero(-1i8 as u8), 0); assert_eq!(cttz_nonzero(-1i8), 0); + assert_eq!(cttz_nonzero(-1i16 as u16), 0); assert_eq!(cttz_nonzero(-1i16), 0); + assert_eq!(cttz_nonzero(-1i32 as u32), 0); assert_eq!(cttz_nonzero(-1i32), 0); + assert_eq!(cttz_nonzero(-1i64 as u64), 0); assert_eq!(cttz_nonzero(-1i64), 0); + + assert_eq!(cttz_nonzero(1u8), 0); assert_eq!(cttz_nonzero(1i8), 0); + assert_eq!(cttz_nonzero(1u16), 0); assert_eq!(cttz_nonzero(1i16), 0); + assert_eq!(cttz_nonzero(1u32), 0); assert_eq!(cttz_nonzero(1i32), 0); + assert_eq!(cttz_nonzero(1u64), 0); assert_eq!(cttz_nonzero(1i64), 0); + + assert_eq!(cttz_nonzero(10u8), 1); assert_eq!(cttz_nonzero(10i8), 1); + assert_eq!(cttz_nonzero(10u16), 1); assert_eq!(cttz_nonzero(10i16), 1); + assert_eq!(cttz_nonzero(10u32), 1); assert_eq!(cttz_nonzero(10i32), 1); + assert_eq!(cttz_nonzero(10u64), 1); assert_eq!(cttz_nonzero(10i64), 1); + + assert_eq!(cttz_nonzero(100u8), 2); assert_eq!(cttz_nonzero(100i8), 2); + assert_eq!(cttz_nonzero(100u16), 2); assert_eq!(cttz_nonzero(100i16), 2); + assert_eq!(cttz_nonzero(100u32), 2); assert_eq!(cttz_nonzero(100i32), 2); + assert_eq!(cttz_nonzero(100u64), 2); assert_eq!(cttz_nonzero(100i64), 2); + assert_eq!(bswap(0x0Au8), 0x0A); // no-op assert_eq!(bswap(0x0Ai8), 0x0A); // no-op assert_eq!(bswap(0x0A0Bu16), 0x0B0A); |
