about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMazdak Farrokhzad <twingoow@gmail.com>2019-12-11 10:10:41 +0100
committerGitHub <noreply@github.com>2019-12-11 10:10:41 +0100
commit830b4ee76adea7577615c7a6950e14c22cf0fb20 (patch)
treeb2c6c494190484a96c79a0ad042e0a76ea4f7e8f
parentddca1e09c36a6ce21d95fec1619f23ba59b69c8a (diff)
parent1f07aa582a41e6fc253139909d3bf9bfd04a9d6d (diff)
downloadrust-830b4ee76adea7577615c7a6950e14c22cf0fb20.tar.gz
rust-830b4ee76adea7577615c7a6950e14c22cf0fb20.zip
Rollup merge of #66881 - krishna-veerareddy:issue-66780-bool-ord-optimization, r=sfackler
Optimize Ord trait implementation for bool

Casting the booleans to `i8`s and converting their difference into `Ordering` generates better assembly than casting them to `u8`s and comparing them.

Fixes #66780

#### Comparison([Godbolt link](https://rust.godbolt.org/z/PjBpvF))

##### Old assembly:
```asm
example::boolean_cmp:
        mov     ecx, edi
        xor     ecx, esi
        test    esi, esi
        mov     eax, 255
        cmove   eax, ecx
        test    edi, edi
        cmovne  eax, ecx
        ret
```

##### New assembly:
```asm
example::boolean_cmp:
        mov     eax, edi
        sub     al, sil
        ret
```

##### Old LLVM-MCA statistics:
```
Iterations:        100
Instructions:      800
Total Cycles:      234
Total uOps:        1000

Dispatch Width:    6
uOps Per Cycle:    4.27
IPC:               3.42
Block RThroughput: 1.7
```

##### New LLVM-MCA statistics:
```
Iterations:        100
Instructions:      300
Total Cycles:      110
Total uOps:        500

Dispatch Width:    6
uOps Per Cycle:    4.55
IPC:               2.73
Block RThroughput: 1.0
```
-rw-r--r--src/libcore/cmp.rs12
-rw-r--r--src/libcore/tests/cmp.rs8
-rw-r--r--src/test/codegen/bool-cmp.rs17
3 files changed, 36 insertions, 1 deletions
diff --git a/src/libcore/cmp.rs b/src/libcore/cmp.rs
index a5f355cd9a7..fd4be02e20f 100644
--- a/src/libcore/cmp.rs
+++ b/src/libcore/cmp.rs
@@ -1005,6 +1005,7 @@ pub fn max_by_key<T, F: FnMut(&T) -> K, K: Ord>(v1: T, v2: T, mut f: F) -> T {
 
 // Implementation of PartialEq, Eq, PartialOrd and Ord for primitive types
 mod impls {
+    use crate::hint::unreachable_unchecked;
     use crate::cmp::Ordering::{self, Less, Greater, Equal};
 
     macro_rules! partial_eq_impl {
@@ -1125,7 +1126,16 @@ mod impls {
     impl Ord for bool {
         #[inline]
         fn cmp(&self, other: &bool) -> Ordering {
-            (*self as u8).cmp(&(*other as u8))
+            // Casting to i8's and converting the difference to an Ordering generates
+            // more optimal assembly.
+            // See <https://github.com/rust-lang/rust/issues/66780> for more info.
+            match (*self as i8) - (*other as i8) {
+                -1 => Less,
+                0 => Equal,
+                1 => Greater,
+                // SAFETY: bool as i8 returns 0 or 1, so the difference can't be anything else
+                _ => unsafe { unreachable_unchecked() },
+            }
         }
     }
 
diff --git a/src/libcore/tests/cmp.rs b/src/libcore/tests/cmp.rs
index 5e6778e222a..56a2f4acf6e 100644
--- a/src/libcore/tests/cmp.rs
+++ b/src/libcore/tests/cmp.rs
@@ -10,6 +10,14 @@ fn test_int_totalord() {
 }
 
 #[test]
+fn test_bool_totalord() {
+    assert_eq!(true.cmp(&false), Greater);
+    assert_eq!(false.cmp(&true), Less);
+    assert_eq!(true.cmp(&true), Equal);
+    assert_eq!(false.cmp(&false), Equal);
+}
+
+#[test]
 fn test_mut_int_totalord() {
     assert_eq!((&mut 5).cmp(&&mut 10), Less);
     assert_eq!((&mut 10).cmp(&&mut 5), Greater);
diff --git a/src/test/codegen/bool-cmp.rs b/src/test/codegen/bool-cmp.rs
new file mode 100644
index 00000000000..8769a4cb5e1
--- /dev/null
+++ b/src/test/codegen/bool-cmp.rs
@@ -0,0 +1,17 @@
+// This is a test for optimal Ord trait implementation for bool.
+// See <https://github.com/rust-lang/rust/issues/66780> for more info.
+
+// compile-flags: -C opt-level=3
+
+#![crate_type = "lib"]
+
+use std::cmp::Ordering;
+
+// CHECK-LABEL: @cmp_bool
+#[no_mangle]
+pub fn cmp_bool(a: bool, b: bool) -> Ordering {
+// CHECK: zext i1
+// CHECK: zext i1
+// CHECK: sub nsw
+    a.cmp(&b)
+}