about summary refs log tree commit diff
diff options
context:
space:
mode:
authorDaniPopes <57450786+DaniPopes@users.noreply.github.com>2024-12-06 23:16:06 +0100
committerDaniPopes <57450786+DaniPopes@users.noreply.github.com>2025-03-06 22:29:05 +0800
commit58c10c66c1d5ec2e47e35a4ff31f3758448f20f4 (patch)
tree0bb8c183d731a26c214bbd1b584b34c8cbaee6ff
parent002da76821d32c8807dc47da16660925d8cc9b62 (diff)
downloadrust-58c10c66c1d5ec2e47e35a4ff31f3758448f20f4.tar.gz
rust-58c10c66c1d5ec2e47e35a4ff31f3758448f20f4.zip
Lower BinOp::Cmp to llvm.{s,u}cmp.* intrinsics
Lowers `mir::BinOp::Cmp` (`three_way_compare` intrinsic) to the corresponding
LLVM `llvm.{s,u}cmp.i8.*` intrinsics, added in LLVM 19.
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs30
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs12
-rw-r--r--compiler/rustc_codegen_ssa/src/mir/rvalue.rs3
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/builder.rs12
-rw-r--r--tests/assembly/x86_64-cmp.rs46
-rw-r--r--tests/codegen/comparison-operators-2-tuple.rs1
-rw-r--r--tests/codegen/integer-cmp.rs35
-rw-r--r--tests/codegen/intrinsics/three_way_compare.rs27
8 files changed, 124 insertions, 42 deletions
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 3f20350d0ef..04b9ed02aab 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -14,6 +14,7 @@ use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::*;
 use rustc_data_structures::small_c_str::SmallCStr;
 use rustc_hir::def_id::DefId;
+use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
 use rustc_middle::ty::layout::{
     FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers,
@@ -1119,6 +1120,35 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         unsafe { llvm::LLVMBuildFCmp(self.llbuilder, op as c_uint, lhs, rhs, UNNAMED) }
     }
 
+    fn three_way_compare(
+        &mut self,
+        ty: Ty<'tcx>,
+        lhs: Self::Value,
+        rhs: Self::Value,
+    ) -> Option<Self::Value> {
+        // FIXME: See comment on the definition of `three_way_compare`.
+        if crate::llvm_util::get_version() < (20, 0, 0) {
+            return None;
+        }
+
+        let name = match (ty.is_signed(), ty.primitive_size(self.tcx).bits()) {
+            (true, 8) => "llvm.scmp.i8.i8",
+            (true, 16) => "llvm.scmp.i8.i16",
+            (true, 32) => "llvm.scmp.i8.i32",
+            (true, 64) => "llvm.scmp.i8.i64",
+            (true, 128) => "llvm.scmp.i8.i128",
+
+            (false, 8) => "llvm.ucmp.i8.i8",
+            (false, 16) => "llvm.ucmp.i8.i16",
+            (false, 32) => "llvm.ucmp.i8.i32",
+            (false, 64) => "llvm.ucmp.i8.i64",
+            (false, 128) => "llvm.ucmp.i8.i128",
+
+            _ => bug!("three-way compare unsupported for type {ty:?}"),
+        };
+        Some(self.call_intrinsic(name, &[lhs, rhs]))
+    }
+
     /* Miscellaneous instructions */
     fn memcpy(
         &mut self,
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index ed8426ae197..e367cf90eee 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -1108,6 +1108,18 @@ impl<'ll> CodegenCx<'ll, '_> {
         ifn!("llvm.usub.sat.i64", fn(t_i64, t_i64) -> t_i64);
         ifn!("llvm.usub.sat.i128", fn(t_i128, t_i128) -> t_i128);
 
+        ifn!("llvm.scmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
+        ifn!("llvm.scmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
+        ifn!("llvm.scmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
+        ifn!("llvm.scmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
+        ifn!("llvm.scmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
+
+        ifn!("llvm.ucmp.i8.i8", fn(t_i8, t_i8) -> t_i8);
+        ifn!("llvm.ucmp.i8.i16", fn(t_i16, t_i16) -> t_i8);
+        ifn!("llvm.ucmp.i8.i32", fn(t_i32, t_i32) -> t_i8);
+        ifn!("llvm.ucmp.i8.i64", fn(t_i64, t_i64) -> t_i8);
+        ifn!("llvm.ucmp.i8.i128", fn(t_i128, t_i128) -> t_i8);
+
         ifn!("llvm.lifetime.start.p0i8", fn(t_i64, ptr) -> void);
         ifn!("llvm.lifetime.end.p0i8", fn(t_i64, ptr) -> void);
 
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index d24e48b37a4..992a23442e2 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -1005,6 +1005,9 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             mir::BinOp::Cmp => {
                 use std::cmp::Ordering;
                 assert!(!is_float);
+                if let Some(value) = bx.three_way_compare(lhs_ty, lhs, rhs) {
+                    return value;
+                }
                 let pred = |op| base::bin_op_to_icmp_predicate(op, is_signed);
                 if bx.cx().tcx().sess.opts.optimize == OptLevel::No {
                     // FIXME: This actually generates tighter assembly, and is a classic trick
diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs
index 99fd6b6510f..070b680e540 100644
--- a/compiler/rustc_codegen_ssa/src/traits/builder.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@@ -397,6 +397,18 @@ pub trait BuilderMethods<'a, 'tcx>:
     fn icmp(&mut self, op: IntPredicate, lhs: Self::Value, rhs: Self::Value) -> Self::Value;
     fn fcmp(&mut self, op: RealPredicate, lhs: Self::Value, rhs: Self::Value) -> Self::Value;
 
+    /// Returns `-1` if `lhs < rhs`, `0` if `lhs == rhs`, and `1` if `lhs > rhs`.
+    // FIXME: Move the default implementation from `codegen_scalar_binop` into this method and
+    // remove the `Option` return once LLVM 20 is the minimum version.
+    fn three_way_compare(
+        &mut self,
+        _ty: Ty<'tcx>,
+        _lhs: Self::Value,
+        _rhs: Self::Value,
+    ) -> Option<Self::Value> {
+        None
+    }
+
     fn memcpy(
         &mut self,
         dst: Self::Value,
diff --git a/tests/assembly/x86_64-cmp.rs b/tests/assembly/x86_64-cmp.rs
index 8cccab7d40d..26c9013d96f 100644
--- a/tests/assembly/x86_64-cmp.rs
+++ b/tests/assembly/x86_64-cmp.rs
@@ -1,5 +1,8 @@
-//@ revisions: DEBUG LLVM-PRE-20-OPTIM LLVM-20-OPTIM
-//@ [DEBUG] compile-flags: -C opt-level=0
+//@ revisions: LLVM-PRE-20-DEBUG LLVM-20-DEBUG LLVM-PRE-20-OPTIM LLVM-20-OPTIM
+//@ [LLVM-PRE-20-DEBUG] compile-flags: -C opt-level=0
+//@ [LLVM-PRE-20-DEBUG] max-llvm-major-version: 19
+//@ [LLVM-20-DEBUG] compile-flags: -C opt-level=0
+//@ [LLVM-20-DEBUG] min-llvm-version: 20
 //@ [LLVM-PRE-20-OPTIM] compile-flags: -C opt-level=3
 //@ [LLVM-PRE-20-OPTIM] max-llvm-major-version: 19
 //@ [LLVM-20-OPTIM] compile-flags: -C opt-level=3
@@ -16,13 +19,19 @@ use std::intrinsics::three_way_compare;
 #[no_mangle]
 // CHECK-LABEL: signed_cmp:
 pub fn signed_cmp(a: i16, b: i16) -> std::cmp::Ordering {
-    // DEBUG: cmp
-    // DEBUG: setg
-    // DEBUG: and
-    // DEBUG: cmp
-    // DEBUG: setl
-    // DEBUG: and
-    // DEBUG: sub
+    // LLVM-PRE-20-DEBUG: cmp
+    // LLVM-PRE-20-DEBUG: setg
+    // LLVM-PRE-20-DEBUG: and
+    // LLVM-PRE-20-DEBUG: cmp
+    // LLVM-PRE-20-DEBUG: setl
+    // LLVM-PRE-20-DEBUG: and
+    // LLVM-PRE-20-DEBUG: sub
+    //
+    // LLVM-20-DEBUG: sub
+    // LLVM-20-DEBUG: setl
+    // LLVM-20-DEBUG: setg
+    // LLVM-20-DEBUG: sub
+    // LLVM-20-DEBUG: ret
 
     // LLVM-PRE-20-OPTIM: xor
     // LLVM-PRE-20-OPTIM: cmp
@@ -42,13 +51,18 @@ pub fn signed_cmp(a: i16, b: i16) -> std::cmp::Ordering {
 #[no_mangle]
 // CHECK-LABEL: unsigned_cmp:
 pub fn unsigned_cmp(a: u16, b: u16) -> std::cmp::Ordering {
-    // DEBUG: cmp
-    // DEBUG: seta
-    // DEBUG: and
-    // DEBUG: cmp
-    // DEBUG: setb
-    // DEBUG: and
-    // DEBUG: sub
+    // LLVM-PRE-20-DEBUG: cmp
+    // LLVM-PRE-20-DEBUG: seta
+    // LLVM-PRE-20-DEBUG: and
+    // LLVM-PRE-20-DEBUG: cmp
+    // LLVM-PRE-20-DEBUG: setb
+    // LLVM-PRE-20-DEBUG: and
+    // LLVM-PRE-20-DEBUG: sub
+    //
+    // LLVM-20-DEBUG: sub
+    // LLVM-20-DEBUG: seta
+    // LLVM-20-DEBUG: sbb
+    // LLVM-20-DEBUG: ret
 
     // LLVM-PRE-20-OPTIM: xor
     // LLVM-PRE-20-OPTIM: cmp
diff --git a/tests/codegen/comparison-operators-2-tuple.rs b/tests/codegen/comparison-operators-2-tuple.rs
index 91a99f9b91f..6a7e489c82d 100644
--- a/tests/codegen/comparison-operators-2-tuple.rs
+++ b/tests/codegen/comparison-operators-2-tuple.rs
@@ -1,5 +1,4 @@
 //@ compile-flags: -C opt-level=1 -Z merge-functions=disabled
-//@ only-x86_64
 //@ min-llvm-version: 20
 
 #![crate_type = "lib"]
diff --git a/tests/codegen/integer-cmp.rs b/tests/codegen/integer-cmp.rs
index 9bbf243946d..812fa8e4a42 100644
--- a/tests/codegen/integer-cmp.rs
+++ b/tests/codegen/integer-cmp.rs
@@ -4,7 +4,7 @@
 //@ revisions: llvm-pre-20 llvm-20
 //@ [llvm-20] min-llvm-version: 20
 //@ [llvm-pre-20] max-llvm-major-version: 19
-//@ compile-flags: -C opt-level=3
+//@ compile-flags: -C opt-level=3 -Zmerge-functions=disabled
 
 #![crate_type = "lib"]
 
@@ -13,7 +13,7 @@ use std::cmp::Ordering;
 // CHECK-LABEL: @cmp_signed
 #[no_mangle]
 pub fn cmp_signed(a: i64, b: i64) -> Ordering {
-    // llvm-20: @llvm.scmp.i8.i64
+    // llvm-20: call{{.*}} i8 @llvm.scmp.i8.i64
     // llvm-pre-20: icmp slt
     // llvm-pre-20: icmp ne
     // llvm-pre-20: zext i1
@@ -24,10 +24,39 @@ pub fn cmp_signed(a: i64, b: i64) -> Ordering {
 // CHECK-LABEL: @cmp_unsigned
 #[no_mangle]
 pub fn cmp_unsigned(a: u32, b: u32) -> Ordering {
-    // llvm-20: @llvm.ucmp.i8.i32
+    // llvm-20: call{{.*}} i8 @llvm.ucmp.i8.i32
     // llvm-pre-20: icmp ult
     // llvm-pre-20: icmp ne
     // llvm-pre-20: zext i1
     // llvm-pre-20: select i1
     a.cmp(&b)
 }
+
+// CHECK-LABEL: @cmp_char
+#[no_mangle]
+pub fn cmp_char(a: char, b: char) -> Ordering {
+    // llvm-20: call{{.*}} i8 @llvm.ucmp.i8.i32
+    // llvm-pre-20: icmp ult
+    // llvm-pre-20: icmp ne
+    // llvm-pre-20: zext i1
+    // llvm-pre-20: select i1
+    a.cmp(&b)
+}
+
+// CHECK-LABEL: @cmp_tuple
+#[no_mangle]
+pub fn cmp_tuple(a: (i16, u16), b: (i16, u16)) -> Ordering {
+    // llvm-20-DAG: call{{.*}} i8 @llvm.ucmp.i8.i16
+    // llvm-20-DAG: call{{.*}} i8 @llvm.scmp.i8.i16
+    // llvm-20: ret i8
+    // llvm-pre-20: icmp slt
+    // llvm-pre-20: icmp ne
+    // llvm-pre-20: zext i1
+    // llvm-pre-20: select i1
+    // llvm-pre-20: icmp ult
+    // llvm-pre-20: icmp ne
+    // llvm-pre-20: zext i1
+    // llvm-pre-20: select i1
+    // llvm-pre-20: select i1
+    a.cmp(&b)
+}
diff --git a/tests/codegen/intrinsics/three_way_compare.rs b/tests/codegen/intrinsics/three_way_compare.rs
index 9a476abe891..95fcb636f7c 100644
--- a/tests/codegen/intrinsics/three_way_compare.rs
+++ b/tests/codegen/intrinsics/three_way_compare.rs
@@ -2,6 +2,7 @@
 //@ [DEBUG] compile-flags: -C opt-level=0
 //@ [OPTIM] compile-flags: -C opt-level=3
 //@ compile-flags: -C no-prepopulate-passes
+//@ min-llvm-version: 20
 
 #![crate_type = "lib"]
 #![feature(core_intrinsics)]
@@ -12,17 +13,8 @@ use std::intrinsics::three_way_compare;
 // CHECK-LABEL: @signed_cmp
 // CHECK-SAME: (i16{{.*}} %a, i16{{.*}} %b)
 pub fn signed_cmp(a: i16, b: i16) -> std::cmp::Ordering {
-    // DEBUG: %[[GT:.+]] = icmp sgt i16 %a, %b
-    // DEBUG: %[[ZGT:.+]] = zext i1 %[[GT]] to i8
-    // DEBUG: %[[LT:.+]] = icmp slt i16 %a, %b
-    // DEBUG: %[[ZLT:.+]] = zext i1 %[[LT]] to i8
-    // DEBUG: %[[R:.+]] = sub nsw i8 %[[ZGT]], %[[ZLT]]
-
-    // OPTIM: %[[LT:.+]] = icmp slt i16 %a, %b
-    // OPTIM: %[[NE:.+]] = icmp ne i16 %a, %b
-    // OPTIM: %[[CGE:.+]] = select i1 %[[NE]], i8 1, i8 0
-    // OPTIM: %[[CGEL:.+]] = select i1 %[[LT]], i8 -1, i8 %[[CGE]]
-    // OPTIM: ret i8 %[[CGEL]]
+    // CHECK: %[[CMP:.+]] = call i8 @llvm.scmp.i8.i16(i16 %a, i16 %b)
+    // CHECK-NEXT: ret i8 %[[CMP]]
     three_way_compare(a, b)
 }
 
@@ -30,16 +22,7 @@ pub fn signed_cmp(a: i16, b: i16) -> std::cmp::Ordering {
 // CHECK-LABEL: @unsigned_cmp
 // CHECK-SAME: (i16{{.*}} %a, i16{{.*}} %b)
 pub fn unsigned_cmp(a: u16, b: u16) -> std::cmp::Ordering {
-    // DEBUG: %[[GT:.+]] = icmp ugt i16 %a, %b
-    // DEBUG: %[[ZGT:.+]] = zext i1 %[[GT]] to i8
-    // DEBUG: %[[LT:.+]] = icmp ult i16 %a, %b
-    // DEBUG: %[[ZLT:.+]] = zext i1 %[[LT]] to i8
-    // DEBUG: %[[R:.+]] = sub nsw i8 %[[ZGT]], %[[ZLT]]
-
-    // OPTIM: %[[LT:.+]] = icmp ult i16 %a, %b
-    // OPTIM: %[[NE:.+]] = icmp ne i16 %a, %b
-    // OPTIM: %[[CGE:.+]] = select i1 %[[NE]], i8 1, i8 0
-    // OPTIM: %[[CGEL:.+]] = select i1 %[[LT]], i8 -1, i8 %[[CGE]]
-    // OPTIM: ret i8 %[[CGEL]]
+    // CHECK: %[[CMP:.+]] = call i8 @llvm.ucmp.i8.i16(i16 %a, i16 %b)
+    // CHECK-NEXT: ret i8 %[[CMP]]
     three_way_compare(a, b)
 }