1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
//@ compile-flags: -C opt-level=3 -C no-prepopulate-passes
//@ only-64bit
#![feature(core_intrinsics, repr_simd)]
#![no_std]
#![crate_type = "lib"]
use core::intrinsics::simd::{simd_add, simd_extract};
#[repr(simd)]
#[derive(Clone, Copy)]
pub struct Simd<T, const N: usize>([T; N]);
#[repr(simd, packed)]
#[derive(Clone, Copy)]
pub struct PackedSimd<T, const N: usize>([T; N]);
#[repr(transparent)]
pub struct Transparent<T>(T);
// These tests don't actually care about the add/extract, but it ensures the
// aggregated temporaries are only used in potentially-SSA ways.
#[no_mangle]
pub fn simd_aggregate_pot(x: [u32; 4], y: [u32; 4]) -> u32 {
// CHECK-LABEL: simd_aggregate_pot
// CHECK: %a = load <4 x i32>, ptr %x, align 4
// CHECK: %b = load <4 x i32>, ptr %y, align 4
// CHECK: add <4 x i32> %a, %b
unsafe {
let a = Simd(x);
let b = Simd(y);
let c = simd_add(a, b);
simd_extract(c, 1)
}
}
#[no_mangle]
pub fn simd_aggregate_npot(x: [u32; 7], y: [u32; 7]) -> u32 {
// CHECK-LABEL: simd_aggregate_npot
// CHECK: %a = load <7 x i32>, ptr %x, align 4
// CHECK: %b = load <7 x i32>, ptr %y, align 4
// CHECK: add <7 x i32> %a, %b
unsafe {
let a = Simd(x);
let b = Simd(y);
let c = simd_add(a, b);
simd_extract(c, 1)
}
}
#[no_mangle]
pub fn packed_simd_aggregate_pot(x: [u32; 4], y: [u32; 4]) -> u32 {
// CHECK-LABEL: packed_simd_aggregate_pot
// CHECK: %a = load <4 x i32>, ptr %x, align 4
// CHECK: %b = load <4 x i32>, ptr %y, align 4
// CHECK: add <4 x i32> %a, %b
unsafe {
let a = PackedSimd(x);
let b = PackedSimd(y);
let c = simd_add(a, b);
simd_extract(c, 1)
}
}
#[no_mangle]
pub fn packed_simd_aggregate_npot(x: [u32; 7], y: [u32; 7]) -> u32 {
// CHECK-LABEL: packed_simd_aggregate_npot
// CHECK: %b = alloca [28 x i8], align 4
// CHECK: %a = alloca [28 x i8], align 4
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %x, i64 28, i1 false)
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %b, ptr align 4 %y, i64 28, i1 false)
// CHECK: %[[TEMPA:.+]] = load <7 x i32>, ptr %a, align 4
// CHECK: %[[TEMPB:.+]] = load <7 x i32>, ptr %b, align 4
// CHECK: add <7 x i32> %[[TEMPA]], %[[TEMPB]]
unsafe {
let a = PackedSimd(x);
let b = PackedSimd(y);
let c = simd_add(a, b);
simd_extract(c, 1)
}
}
#[no_mangle]
pub fn transparent_simd_aggregate(x: [u32; 4]) -> u32 {
// The transparent wrapper can just use the same SSA value as its field.
// No extra processing or spilling needed.
// CHECK-LABEL: transparent_simd_aggregate
// CHECK-NOT: alloca
// CHECK: %[[RET:.+]] = alloca [4 x i8]
// CHECK-NOT: alloca
// CHECK: %a = load <4 x i32>, ptr %x, align 4
// CHECK: %[[TEMP:.+]] = extractelement <4 x i32> %a, i32 1
// CHECK: store i32 %[[TEMP]], ptr %[[RET]]
unsafe {
let a = Simd(x);
let b = Transparent(a);
simd_extract(b.0, 1)
}
}
|