From b5376ba6017fa04a13afda6ac52587f06a6c0bd8 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Fri, 29 Mar 2024 00:00:24 -0700 Subject: Remove my `scalar_copy_backend_type` optimization attempt I added this back in 111999, but I no longer think it's a good idea - It had to get scaled back to only power-of-two things to not break a bunch of targets - LLVM seems to be getting better at memcpy removal anyway - Introducing vector instructions has seemed to sometimes (115515) make autovectorization worse So this removes it from the codegen crates entirely, and instead just tries to use instead of direct `memcpy` so things will still use load/store for immediates. --- tests/codegen/array-codegen.rs | 44 ++++++++++++++++++-------------- tests/codegen/array-optimized.rs | 8 +++--- tests/codegen/mem-replace-simple-type.rs | 6 ++--- 3 files changed, 31 insertions(+), 27 deletions(-) (limited to 'tests/codegen') diff --git a/tests/codegen/array-codegen.rs b/tests/codegen/array-codegen.rs index bb4bd5444db..1310e61c41d 100644 --- a/tests/codegen/array-codegen.rs +++ b/tests/codegen/array-codegen.rs @@ -5,52 +5,58 @@ // CHECK-LABEL: @array_load #[no_mangle] pub fn array_load(a: &[u8; 4]) -> [u8; 4] { - // CHECK: %_0 = alloca [4 x i8], align 1 - // CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1 - // CHECK: store <4 x i8> %[[TEMP1]], ptr %_0, align 1 - // CHECK: %[[TEMP2:.+]] = load i32, ptr %_0, align 1 - // CHECK: ret i32 %[[TEMP2]] + // CHECK-NOT: alloca + // CHECK: %[[ALLOCA:.+]] = alloca [4 x i8], align 1 + // CHECK-NOT: alloca + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[ALLOCA]], ptr align 1 %a, {{.+}} 4, i1 false) + // CHECK: %[[TEMP:.+]] = load i32, ptr %[[ALLOCA]], align 1 + // CHECK: ret i32 %[[TEMP]] *a } // CHECK-LABEL: @array_store #[no_mangle] pub fn array_store(a: [u8; 4], p: &mut [u8; 4]) { + // CHECK-NOT: alloca + // CHECK: %[[TEMP:.+]] = alloca i32, [[TEMPALIGN:align [0-9]+]] + // CHECK-NOT: alloca // CHECK: %a = alloca [4 x i8] - // CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1 - // CHECK-NEXT: store <4 x i8> %[[TEMP]], ptr %p, align 1 + // CHECK-NOT: alloca + // store i32 %0, ptr %[[TEMP]] + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %a, ptr [[TEMPALIGN]] %[[TEMP]], {{.+}} 4, i1 false) + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %a, {{.+}} 4, i1 false) *p = a; } // CHECK-LABEL: @array_copy #[no_mangle] pub fn array_copy(a: &[u8; 4], p: &mut [u8; 4]) { + // CHECK-NOT: alloca // CHECK: %[[LOCAL:.+]] = alloca [4 x i8], align 1 - // CHECK: %[[TEMP1:.+]] = load <4 x i8>, ptr %a, align 1 - // CHECK: store <4 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1 - // CHECK: %[[TEMP2:.+]] = load <4 x i8>, ptr %[[LOCAL]], align 1 - // CHECK: store <4 x i8> %[[TEMP2]], ptr %p, align 1 + // CHECK-NOT: alloca + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 4, i1 false) + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 4, i1 false) *p = *a; } // CHECK-LABEL: @array_copy_1_element #[no_mangle] pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) { + // CHECK-NOT: alloca // CHECK: %[[LOCAL:.+]] = alloca [1 x i8], align 1 - // CHECK: %[[TEMP1:.+]] = load i8, ptr %a, align 1 - // CHECK: store i8 %[[TEMP1]], ptr %[[LOCAL]], align 1 - // CHECK: %[[TEMP2:.+]] = load i8, ptr %[[LOCAL]], align 1 - // CHECK: store i8 %[[TEMP2]], ptr %p, align 1 + // CHECK-NOT: alloca + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 1, i1 false) + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 1, i1 false) *p = *a; } // CHECK-LABEL: @array_copy_2_elements #[no_mangle] pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) { + // CHECK-NOT: alloca // CHECK: %[[LOCAL:.+]] = alloca [2 x i8], align 1 - // CHECK: %[[TEMP1:.+]] = load <2 x i8>, ptr %a, align 1 - // CHECK: store <2 x i8> %[[TEMP1]], ptr %[[LOCAL]], align 1 - // CHECK: %[[TEMP2:.+]] = load <2 x i8>, ptr %[[LOCAL]], align 1 - // CHECK: store <2 x i8> %[[TEMP2]], ptr %p, align 1 + // CHECK-NOT: alloca + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %[[LOCAL]], ptr align 1 %a, {{.+}} 2, i1 false) + // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %p, ptr align 1 %[[LOCAL]], {{.+}} 2, i1 false) *p = *a; } diff --git a/tests/codegen/array-optimized.rs b/tests/codegen/array-optimized.rs index 4cf16f1fb30..42fdbd39b7e 100644 --- a/tests/codegen/array-optimized.rs +++ b/tests/codegen/array-optimized.rs @@ -16,8 +16,8 @@ pub fn array_copy_1_element(a: &[u8; 1], p: &mut [u8; 1]) { #[no_mangle] pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) { // CHECK-NOT: alloca - // CHECK: %[[TEMP:.+]] = load <2 x i8>, ptr %a, align 1 - // CHECK: store <2 x i8> %[[TEMP]], ptr %p, align 1 + // CHECK: %[[TEMP:.+]] = load i16, ptr %a, align 1 + // CHECK: store i16 %[[TEMP]], ptr %p, align 1 // CHECK: ret *p = *a; } @@ -26,8 +26,8 @@ pub fn array_copy_2_elements(a: &[u8; 2], p: &mut [u8; 2]) { #[no_mangle] pub fn array_copy_4_elements(a: &[u8; 4], p: &mut [u8; 4]) { // CHECK-NOT: alloca - // CHECK: %[[TEMP:.+]] = load <4 x i8>, ptr %a, align 1 - // CHECK: store <4 x i8> %[[TEMP]], ptr %p, align 1 + // CHECK: %[[TEMP:.+]] = load i32, ptr %a, align 1 + // CHECK: store i32 %[[TEMP]], ptr %p, align 1 // CHECK: ret *p = *a; } diff --git a/tests/codegen/mem-replace-simple-type.rs b/tests/codegen/mem-replace-simple-type.rs index b00fbad05d9..50b43f5854a 100644 --- a/tests/codegen/mem-replace-simple-type.rs +++ b/tests/codegen/mem-replace-simple-type.rs @@ -45,9 +45,7 @@ pub fn replace_short_array_3(r: &mut [u32; 3], v: [u32; 3]) -> [u32; 3] { // CHECK-LABEL: @replace_short_array_4( pub fn replace_short_array_4(r: &mut [u32; 4], v: [u32; 4]) -> [u32; 4] { // CHECK-NOT: alloca - // CHECK: %[[R:.+]] = load <4 x i32>, ptr %r, align 4 - // CHECK: store <4 x i32> %[[R]], ptr %result - // CHECK: %[[V:.+]] = load <4 x i32>, ptr %v, align 4 - // CHECK: store <4 x i32> %[[V]], ptr %r + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %result, ptr align 4 %r, i64 16, i1 false) + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %r, ptr align 4 %v, i64 16, i1 false) std::mem::replace(r, v) } -- cgit 1.4.1-3-g733a5 From 593e900ad25f1b21cc218ea8bcce3c5e3e94ceac Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Tue, 9 Apr 2024 13:41:35 -0700 Subject: Update 122805 test for PR 123185 --- tests/codegen/issues/issue-122805.rs | 39 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) (limited to 'tests/codegen') diff --git a/tests/codegen/issues/issue-122805.rs b/tests/codegen/issues/issue-122805.rs index 6a5be39238f..6d108ada6dd 100644 --- a/tests/codegen/issues/issue-122805.rs +++ b/tests/codegen/issues/issue-122805.rs @@ -1,4 +1,11 @@ -//@ compile-flags: -O +//@ revisions: OPT2 OPT3WINX64 OPT3LINX64 +//@ [OPT2] compile-flags: -O +//@ [OPT3LINX64] compile-flags: -C opt-level=3 +//@ [OPT3WINX64] compile-flags: -C opt-level=3 +//@ [OPT3LINX64] only-linux +//@ [OPT3WINX64] only-windows +//@ [OPT3LINX64] only-x86_64 +//@ [OPT3WINX64] only-x86_64 //@ min-llvm-version: 18.1.3 #![crate_type = "lib"] @@ -9,15 +16,27 @@ // to avoid complicating the code. // CHECK-LABEL: define{{.*}}void @convert( // CHECK-NOT: shufflevector -// CHECK: insertelement <8 x i16> -// CHECK-NEXT: insertelement <8 x i16> -// CHECK-NEXT: insertelement <8 x i16> -// CHECK-NEXT: insertelement <8 x i16> -// CHECK-NEXT: insertelement <8 x i16> -// CHECK-NEXT: insertelement <8 x i16> -// CHECK-NEXT: insertelement <8 x i16> -// CHECK-NEXT: insertelement <8 x i16> -// CHECK-NEXT: store <8 x i16> +// OPT2: store i16 +// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 2 +// OPT2-NEXT: store i16 +// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 4 +// OPT2-NEXT: store i16 +// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 6 +// OPT2-NEXT: store i16 +// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 8 +// OPT2-NEXT: store i16 +// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 10 +// OPT2-NEXT: store i16 +// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 12 +// OPT2-NEXT: store i16 +// OPT2-NEXT: getelementptr inbounds i8, {{.+}} 14 +// OPT2-NEXT: store i16 +// OPT3LINX64: load <8 x i16> +// OPT3LINX64-NEXT: call <8 x i16> @llvm.bswap +// OPT3LINX64-NEXT: store <8 x i16> +// OPT3WINX64: load <8 x i16> +// OPT3WINX64-NEXT: call <8 x i16> @llvm.bswap +// OPT3WINX64-NEXT: store <8 x i16> // CHECK-NEXT: ret void #[no_mangle] #[cfg(target_endian = "little")] -- cgit 1.4.1-3-g733a5