about summary refs log tree commit diff
diff options
context:
space:
mode:
authorScott McMurray <scottmcm@users.noreply.github.com>2017-06-08 21:29:45 -0700
committerScott McMurray <scottmcm@users.noreply.github.com>2017-06-08 23:01:39 -0700
commit6d86f0c018b57fcb9ca12c801939130b7f8e441e (patch)
tree1563fcaaa1b22911c04c545af1969738a7d293f9
parent13e24003472c13636d4350a530d6e6495775afeb (diff)
downloadrust-6d86f0c018b57fcb9ca12c801939130b7f8e441e.tar.gz
rust-6d86f0c018b57fcb9ca12c801939130b7f8e441e.zip
Use ctlz_nonzero to improve ASM from next_power_of_two
-rw-r--r--src/libcore/num/mod.rs17
1 files changed, 16 insertions, 1 deletions
diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs
index b76eac9e51f..62d75445cc9 100644
--- a/src/libcore/num/mod.rs
+++ b/src/libcore/num/mod.rs
@@ -1262,6 +1262,7 @@ macro_rules! uint_impl {
     ($SelfT:ty, $ActualT:ty, $BITS:expr,
      $ctpop:path,
      $ctlz:path,
+     $ctlz_nonzero:path,
      $cttz:path,
      $bswap:path,
      $add_with_overflow:path,
@@ -2184,6 +2185,7 @@ macro_rules! uint_impl {
         // This method cannot overflow, as in the `next_power_of_two`
         // overflow cases it instead ends up returning the maximum value
         // of the type, and can return 0 for 0.
+        #[inline]
         fn one_less_than_next_power_of_two(self) -> Self {
             if self <= 1 { return 0; }
 
@@ -2192,7 +2194,7 @@ macro_rules! uint_impl {
             // (such as intel pre-haswell) have more efficient ctlz
             // intrinsics when the argument is non-zero.
             let p = self - 1;
-            let z = p.leading_zeros();
+            let z = unsafe { $ctlz_nonzero(p) };
             <$SelfT>::max_value() >> z
         }
 
@@ -2236,11 +2238,17 @@ macro_rules! uint_impl {
     }
 }
 
+#[cfg(stage0)]
+unsafe fn ctlz_nonzero<T>(x: T) -> T { intrinsics::ctlz(x) }
+#[cfg(not(stage0))]
+unsafe fn ctlz_nonzero<T>(x: T) -> T { intrinsics::ctlz_nonzero(x) }
+
 #[lang = "u8"]
 impl u8 {
     uint_impl! { u8, u8, 8,
         intrinsics::ctpop,
         intrinsics::ctlz,
+        ctlz_nonzero,
         intrinsics::cttz,
         intrinsics::bswap,
         intrinsics::add_with_overflow,
@@ -2253,6 +2261,7 @@ impl u16 {
     uint_impl! { u16, u16, 16,
         intrinsics::ctpop,
         intrinsics::ctlz,
+        ctlz_nonzero,
         intrinsics::cttz,
         intrinsics::bswap,
         intrinsics::add_with_overflow,
@@ -2265,6 +2274,7 @@ impl u32 {
     uint_impl! { u32, u32, 32,
         intrinsics::ctpop,
         intrinsics::ctlz,
+        ctlz_nonzero,
         intrinsics::cttz,
         intrinsics::bswap,
         intrinsics::add_with_overflow,
@@ -2277,6 +2287,7 @@ impl u64 {
     uint_impl! { u64, u64, 64,
         intrinsics::ctpop,
         intrinsics::ctlz,
+        ctlz_nonzero,
         intrinsics::cttz,
         intrinsics::bswap,
         intrinsics::add_with_overflow,
@@ -2289,6 +2300,7 @@ impl u128 {
     uint_impl! { u128, u128, 128,
         intrinsics::ctpop,
         intrinsics::ctlz,
+        ctlz_nonzero,
         intrinsics::cttz,
         intrinsics::bswap,
         intrinsics::add_with_overflow,
@@ -2302,6 +2314,7 @@ impl usize {
     uint_impl! { usize, u16, 16,
         intrinsics::ctpop,
         intrinsics::ctlz,
+        ctlz_nonzero,
         intrinsics::cttz,
         intrinsics::bswap,
         intrinsics::add_with_overflow,
@@ -2314,6 +2327,7 @@ impl usize {
     uint_impl! { usize, u32, 32,
         intrinsics::ctpop,
         intrinsics::ctlz,
+        ctlz_nonzero,
         intrinsics::cttz,
         intrinsics::bswap,
         intrinsics::add_with_overflow,
@@ -2327,6 +2341,7 @@ impl usize {
     uint_impl! { usize, u64, 64,
         intrinsics::ctpop,
         intrinsics::ctlz,
+        ctlz_nonzero,
         intrinsics::cttz,
         intrinsics::bswap,
         intrinsics::add_with_overflow,