about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-10-12 18:45:01 +0000
committerbors <bors@rust-lang.org>2023-10-12 18:45:01 +0000
commitdf4379b4eb5357263f0cf75475953f9b5c48c31f (patch)
tree5dcabe4de59bc9ca25a492dd04b2d3ae377e61fe
parent672fad9b860c6f2e2b5ca088d7b426099c7198a5 (diff)
parentf5cdd3e130dce081a72735eeeb0eca283912f48b (diff)
downloadrust-df4379b4eb5357263f0cf75475953f9b5c48c31f.tar.gz
rust-df4379b4eb5357263f0cf75475953f9b5c48c31f.zip
Auto merge of #116510 - scottmcm:no-1-simd-v2, r=compiler-errors
Copy 1-element arrays as scalars, not vectors

For `[T; 1]` it's silly to copy as `<1 x T>` when we can just copy as `T`.

Inspired by https://github.com/rust-lang/rust/issues/101210#issuecomment-1732470941, which pointed out that `Option<[u8; 1]>` was codegenning worse than `Option<u8>`.

(I'm not sure *why* LLVM doesn't optimize out `<1 x u8>`, but might as well just not emit it in the first place in this codepath.)

---

I think I bit off too much in #116479; let me try just the scalar case first.

r? `@ghost`
-rw-r--r--compiler/rustc_codegen_llvm/src/type_of.rs7
-rw-r--r--tests/assembly/x86_64-array-pair-load-store-merge.rs20
-rw-r--r--tests/codegen/array-codegen.rs22
-rw-r--r--tests/codegen/array-optimized.rs33
4 files changed, 81 insertions, 1 deletions
diff --git a/compiler/rustc_codegen_llvm/src/type_of.rs b/compiler/rustc_codegen_llvm/src/type_of.rs
index dcc62d314ff..fd4c9572af2 100644
--- a/compiler/rustc_codegen_llvm/src/type_of.rs
+++ b/compiler/rustc_codegen_llvm/src/type_of.rs
@@ -397,7 +397,12 @@ impl<'tcx> LayoutLlvmExt<'tcx> for TyAndLayout<'tcx> {
             // extracts all the individual values.
 
             let ety = element.llvm_type(cx);
-            return Some(cx.type_vector(ety, *count));
+            if *count == 1 {
+                // Emitting `<1 x T>` would be silly; just use the scalar.
+                return Some(ety);
+            } else {
+                return Some(cx.type_vector(ety, *count));
+            }
         }
 
         // FIXME: The above only handled integer arrays; surely more things
diff --git a/tests/assembly/x86_64-array-pair-load-store-merge.rs b/tests/assembly/x86_64-array-pair-load-store-merge.rs
new file mode 100644
index 00000000000..55e317e91bf
--- /dev/null
+++ b/tests/assembly/x86_64-array-pair-load-store-merge.rs
@@ -0,0 +1,20 @@
+// assembly-output: emit-asm
+// compile-flags: --crate-type=lib -O -C llvm-args=-x86-asm-syntax=intel
+// only-x86_64
+// ignore-sgx
+// ignore-macos (manipulates rsp too)
+
+// Depending on various codegen choices, this might end up copying
+// a `<2 x i8>`, an `i16`, or two `i8`s.
+// Regardless of those choices, make sure the instructions use (2-byte) words.
+
+// CHECK-LABEL: array_copy_2_elements:
+#[no_mangle]
+pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
+    // CHECK-NOT: byte
+    // CHECK-NOT: mov
+    // CHECK: mov{{.+}}, word ptr
+    // CHECK-NEXT: mov word ptr
+    // CHECK-NEXT: ret
+    *p = *a;
+}
diff --git a/tests/codegen/array-codegen.rs b/tests/codegen/array-codegen.rs
index ba0d444f97e..bf5ae74679b 100644
--- a/tests/codegen/array-codegen.rs
+++ b/tests/codegen/array-codegen.rs
@@ -32,3 +32,25 @@ pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) {
     // CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1
     *p = *a;
 }
+
+// CHECK-LABEL: @array_copy_1_element
+#[no_mangle]
+pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
+    // CHECK: %[[LOCAL:.+]] = alloca [1 x i8], align 1
+    // CHECK: %[[TEMP1:.+]] = load i8, ptr %a, align 1
+    // CHECK: store i8 %[[TEMP1]], ptr %[[LOCAL]], align 1
+    // CHECK: %[[TEMP2:.+]] = load i8, ptr %[[LOCAL]], align 1
+    // CHECK: store i8 %[[TEMP2]], ptr %p, align 1
+    *p = *a;
+}
+
+// CHECK-LABEL: @array_copy_2_elements
+#[no_mangle]
+pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
+    // CHECK: %[[LOCAL:.+]] = alloca [2 x i8], align 1
+    // CHECK: %[[TEMP1:.+]] = load <2 x i8>, ptr %a, align 1
+    // CHECK: store <2 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1
+    // CHECK: %[[TEMP2:.+]] = load <2 x i8>, ptr %[[LOCAL]], align 1
+    // CHECK: store <2 x i8> %[[TEMP2]], ptr %p, align 1
+    *p = *a;
+}
diff --git a/tests/codegen/array-optimized.rs b/tests/codegen/array-optimized.rs
new file mode 100644
index 00000000000..27448fdcfad
--- /dev/null
+++ b/tests/codegen/array-optimized.rs
@@ -0,0 +1,33 @@
+// compile-flags: -O
+
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @array_copy_1_element
+#[no_mangle]
+pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = load i8, ptr %a, align 1
+    // CHECK: store i8 %[[TEMP]], ptr %p, align 1
+    // CHECK: ret
+    *p = *a;
+}
+
+// CHECK-LABEL: @array_copy_2_elements
+#[no_mangle]
+pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = load <2 x i8>, ptr %a, align 1
+    // CHECK: store <2 x i8> %[[TEMP]], ptr %p, align 1
+    // CHECK: ret
+    *p = *a;
+}
+
+// CHECK-LABEL: @array_copy_4_elements
+#[no_mangle]
+pub fn array_copy_4_elements(a: &[u8; 4], p: &mut [u8; 4]) {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1
+    // CHECK: store <4 x i8> %[[TEMP]], ptr %p, align 1
+    // CHECK: ret
+    *p = *a;
+}