diff options
| author | Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> | 2024-01-02 22:18:00 +0100 |
|---|---|---|
| committer | Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> | 2024-01-03 20:25:44 +0100 |
| commit | c8f5d35508e062bd2d95e6c03429bfec831db6d3 (patch) | |
| tree | 53760a62156eb31661061ad4dfa1f990454d7a9e /tests/codegen | |
| parent | 0c72b43614d026f573be190f0e912390044ae3fb (diff) | |
| download | rust-c8f5d35508e062bd2d95e6c03429bfec831db6d3.tar.gz rust-c8f5d35508e062bd2d95e6c03429bfec831db6d3.zip | |
Restructure x86 signed pack instructions
This reduces the amount of duplicated code and the chance for bugs.
I validated the new code for correctness against LLVM using the
following script. It found many bugs in the implementation until I was
finally able to get it correct and passing.
```rust
//! Test for x86 pack instructions. Prints deterministic results, use it to compare backends.
use std::arch::x86_64::{self, __m128i, __m256i};
use rand::{rngs::SmallRng, Rng, SeedableRng};
fn main() {
let rng = &mut SmallRng::seed_from_u64(123);
for _ in 0..100_000 {
unsafe {
sse_test(rng);
avx_test(rng);
}
}
}
unsafe fn sse_test(rng: &mut SmallRng) {
print_sse_8(x86_64::_mm_packus_epi16(sse16(rng), sse16(rng)));
print_sse_8(x86_64::_mm_packs_epi16(sse16(rng), sse16(rng)));
print_sse_16(x86_64::_mm_packus_epi32(sse32(rng), sse32(rng)));
print_sse_16(x86_64::_mm_packs_epi32(sse32(rng), sse32(rng)));
}
unsafe fn avx_test(rng: &mut SmallRng) {
print_avx_8(x86_64::_mm256_packs_epi16(avx16(rng), avx16(rng)));
print_avx_8(x86_64::_mm256_packs_epi16(avx16(rng), avx16(rng)));
print_avx_16(x86_64::_mm256_packus_epi32(avx32(rng), avx32(rng)));
print_avx_16(x86_64::_mm256_packs_epi32(avx32(rng), avx32(rng)));
}
fn print_sse_8(t: __m128i) {
let ints = unsafe { std::mem::transmute::<_, [i8; 16]>(t) };
println!("{ints:?}");
}
fn print_sse_16(t: __m128i) {
let ints = unsafe { std::mem::transmute::<_, [i16; 8]>(t) };
println!("{ints:?}");
}
fn print_avx_8(t: __m256i) {
let ints = unsafe { std::mem::transmute::<_, [i8; 32]>(t) };
println!("{ints:?}");
}
fn print_avx_16(t: __m256i) {
let ints = unsafe { std::mem::transmute::<_, [i16; 16]>(t) };
println!("{ints:?}");
}
fn sse16(rand: &mut SmallRng) -> __m128i {
unsafe { std::mem::transmute([(); 8].map(|()| i16(rand))) }
}
fn sse32(rand: &mut SmallRng) -> __m128i {
unsafe { std::mem::transmute([(); 4].map(|()| i32(rand))) }
}
fn avx16(rand: &mut SmallRng) -> __m256i {
unsafe { std::mem::transmute([(); 16].map(|()| i16(rand))) }
}
fn avx32(rand: &mut SmallRng) -> __m256i {
unsafe { std::mem::transmute([(); 8].map(|()| i32(rand))) }
}
fn i16(rand: &mut SmallRng) -> i16 {
if rand.gen() {
rand.gen::<i16>()
} else {
rand.gen::<i8>() as i16
}
}
fn i32(rand: &mut SmallRng) -> i32 {
if rand.gen() {
rand.gen::<i32>()
} else {
rand.gen::<i16>() as i32
}
}
```
Diffstat (limited to 'tests/codegen')
0 files changed, 0 insertions, 0 deletions
