about summary refs log tree commit diff
path: root/src/test/codegen
diff options
context:
space:
mode:
authorNicholas Nethercote <n.nethercote@gmail.com>2022-10-20 18:59:07 +1100
committerNicholas Nethercote <n.nethercote@gmail.com>2022-10-31 10:16:39 +1100
commit003a3f8cd3ea4594e2c2bc89c05562a202868ce0 (patch)
treeeded7bafa2d9a51eff572a3510d175c3a494191b /src/test/codegen
parentf42b6fa7cad0d221b0c5407dca70b085784b1b5e (diff)
downloadrust-003a3f8cd3ea4594e2c2bc89c05562a202868ce0.tar.gz
rust-003a3f8cd3ea4594e2c2bc89c05562a202868ce0.zip
Use `br` instead of `switch` in more cases.
`codegen_switchint_terminator` already uses `br` instead of `switch`
when there is one normal target plus the `otherwise` target. But there's
another common case with two normal targets and an `otherwise` target
that points to an empty unreachable BB. This comes up a lot when
switching on the tags of enums that use niches.

The pattern looks like this:
```
bb1:                                              ; preds = %bb6
  %3 = load i8, ptr %_2, align 1, !range !9, !noundef !4
  %4 = sub i8 %3, 2
  %5 = icmp eq i8 %4, 0
  %_6 = select i1 %5, i64 0, i64 1
  switch i64 %_6, label %bb3 [
    i64 0, label %bb4
    i64 1, label %bb2
  ]

bb3:                                              ; preds = %bb1
  unreachable
```
This commit adds code to convert the `switch` to a `br`:
```
bb1:                                              ; preds = %bb6
  %3 = load i8, ptr %_2, align 1, !range !9, !noundef !4
  %4 = sub i8 %3, 2
  %5 = icmp eq i8 %4, 0
  %_6 = select i1 %5, i64 0, i64 1
  %6 = icmp eq i64 %_6, 0
  br i1 %6, label %bb4, label %bb2

bb3:                                              ; No predecessors!
  unreachable
```
This has a surprisingly large effect on compile times, with reductions
of 5% on debug builds of some crates. The reduction is all due to LLVM
taking less time. Maybe LLVM is just much better at handling `br` than
`switch`.

The resulting code is still suboptimal.
- The `icmp`, `select`, `icmp` sequence is silly, converting an `i1` to an `i64`
  and back to an `i1`. But with the current code structure it's hard to avoid,
  and LLVM will easily clean it up, in opt builds at least.
- `bb3` is usually now truly dead code (though not always, so it can't
  be removed universally).
Diffstat (limited to 'src/test/codegen')
-rw-r--r--src/test/codegen/match-optimized.rs60
-rw-r--r--src/test/codegen/match-unoptimized.rs23
-rw-r--r--src/test/codegen/match.rs29
3 files changed, 83 insertions, 29 deletions
diff --git a/src/test/codegen/match-optimized.rs b/src/test/codegen/match-optimized.rs
new file mode 100644
index 00000000000..36402cc7353
--- /dev/null
+++ b/src/test/codegen/match-optimized.rs
@@ -0,0 +1,60 @@
+// compile-flags: -C no-prepopulate-passes -O
+
+#![crate_type = "lib"]
+
+pub enum E {
+    A,
+    B,
+    C,
+}
+
+// CHECK-LABEL: @exhaustive_match
+#[no_mangle]
+pub fn exhaustive_match(e: E) -> u8 {
+// CHECK: switch{{.*}}, label %[[OTHERWISE:[a-zA-Z0-9_]+]] [
+// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[A:[a-zA-Z0-9_]+]]
+// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[B:[a-zA-Z0-9_]+]]
+// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[C:[a-zA-Z0-9_]+]]
+// CHECK-NEXT: ]
+// CHECK: [[OTHERWISE]]:
+// CHECK-NEXT: unreachable
+//
+// CHECK: [[A]]:
+// CHECK-NEXT: store i8 0, {{i8\*|ptr}} %1, align 1
+// CHECK-NEXT: br label %[[EXIT:[a-zA-Z0-9_]+]]
+// CHECK: [[B]]:
+// CHECK-NEXT: store i8 1, {{i8\*|ptr}} %1, align 1
+// CHECK-NEXT: br label %[[EXIT]]
+// CHECK: [[C]]:
+// CHECK-NEXT: store i8 2, {{i8\*|ptr}} %1, align 1
+// CHECK-NEXT: br label %[[EXIT]]
+    match e {
+        E::A => 0,
+        E::B => 1,
+        E::C => 2,
+    }
+}
+
+#[repr(u16)]
+pub enum E2 {
+    A = 13,
+    B = 42,
+}
+
+// For optimized code we produce a switch with an unreachable target as the `otherwise` so LLVM
+// knows the possible values. Compare with `src/test/codegen/match-unoptimized.rs`.
+
+// CHECK-LABEL: @exhaustive_match_2
+#[no_mangle]
+pub fn exhaustive_match_2(e: E2) -> u8 {
+    // CHECK: switch i16 %{{.+}}, label %[[UNREACH:.+]] [
+    // CHECK-NEXT: i16 13,
+    // CHECK-NEXT: i16 42,
+    // CHECK-NEXT: ]
+    // CHECK: [[UNREACH]]:
+    // CHECK-NEXT: unreachable
+    match e {
+        E2::A => 0,
+        E2::B => 1,
+    }
+}
diff --git a/src/test/codegen/match-unoptimized.rs b/src/test/codegen/match-unoptimized.rs
new file mode 100644
index 00000000000..be40b29e3d3
--- /dev/null
+++ b/src/test/codegen/match-unoptimized.rs
@@ -0,0 +1,23 @@
+// compile-flags: -C no-prepopulate-passes -Copt-level=0
+
+#![crate_type = "lib"]
+
+#[repr(u16)]
+pub enum E2 {
+    A = 13,
+    B = 42,
+}
+
+// For unoptimized code we produce a `br` instead of a `switch`. Compare with
+// `src/test/codegen/match-optimized.rs`
+
+// CHECK-LABEL: @exhaustive_match_2
+#[no_mangle]
+pub fn exhaustive_match_2(e: E2) -> u8 {
+    // CHECK: %[[CMP:.+]] = icmp eq i16 %{{.+}}, 13
+    // CHECK-NEXT: br i1 %[[CMP:.+]],
+    match e {
+        E2::A => 0,
+        E2::B => 1,
+    }
+}
diff --git a/src/test/codegen/match.rs b/src/test/codegen/match.rs
deleted file mode 100644
index b203641fddb..00000000000
--- a/src/test/codegen/match.rs
+++ /dev/null
@@ -1,29 +0,0 @@
-// compile-flags: -C no-prepopulate-passes
-
-#![crate_type = "lib"]
-
-pub enum E {
-    A,
-    B,
-}
-
-// CHECK-LABEL: @exhaustive_match
-#[no_mangle]
-pub fn exhaustive_match(e: E) -> u8 {
-// CHECK: switch{{.*}}, label %[[OTHERWISE:[a-zA-Z0-9_]+]] [
-// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[A:[a-zA-Z0-9_]+]]
-// CHECK-NEXT: i[[TY:[0-9]+]] [[DISCR:[0-9]+]], label %[[B:[a-zA-Z0-9_]+]]
-// CHECK-NEXT: ]
-// CHECK: [[OTHERWISE]]:
-// CHECK-NEXT: unreachable
-// CHECK: [[A]]:
-// CHECK-NEXT: store i8 0, {{i8\*|ptr}} %1, align 1
-// CHECK-NEXT: br label %[[EXIT:[a-zA-Z0-9_]+]]
-// CHECK: [[B]]:
-// CHECK-NEXT: store i8 1, {{i8\*|ptr}} %1, align 1
-// CHECK-NEXT: br label %[[EXIT:[a-zA-Z0-9_]+]]
-    match e {
-        E::A => 0,
-        E::B => 1,
-    }
-}