8 files changed, 266 insertions, 14 deletions
diff --git a/tests/codegen/optimize-attr-1.rs b/tests/codegen/optimize-attr-1.rs
index 3aee44791e0..db6bdcf9a8b 100644
--- a/tests/codegen/optimize-attr-1.rs
+++ b/tests/codegen/optimize-attr-1.rs
@@ -37,11 +37,23 @@ pub fn speed() -> i32 {
     4 + 4
 }
 
+// CHECK-LABEL: define{{.*}}i32 @none
+// CHECK-SAME: [[NONE_ATTRS:#[0-9]+]]
+// SIZE-OPT: alloca
+// SPEED-OPT: alloca
+#[no_mangle]
+#[optimize(none)]
+pub fn none() -> i32 {
+    let arr = [0, 1, 2, 3, 4];
+    arr[4]
+}
+
 // NO-OPT-DAG: attributes [[SIZE_ATTRS]] = {{.*}}minsize{{.*}}optsize{{.*}}
 // SPEED-OPT-DAG: attributes [[SIZE_ATTRS]] = {{.*}}minsize{{.*}}optsize{{.*}}
 // SIZE-OPT-DAG: attributes [[NOTHING_ATTRS]] = {{.*}}optsize{{.*}}
 // SIZE-OPT-DAG: attributes [[SIZE_ATTRS]] = {{.*}}minsize{{.*}}optsize{{.*}}
+// CHECK-DAG: attributes [[NONE_ATTRS]] = {{.*}}noinline{{.*}}optnone{{.*}}
 
-// SIZE-OPT: attributes [[SPEED_ATTRS]]
+// SIZE-OPT-DAG: attributes [[SPEED_ATTRS]]
 // SIZE-OPT-NOT: minsize
 // SIZE-OPT-NOT: optsize
diff --git a/tests/codegen/s390x-simd.rs b/tests/codegen/s390x-simd.rs
new file mode 100644
index 00000000000..23181e6a103
--- /dev/null
+++ b/tests/codegen/s390x-simd.rs
@@ -0,0 +1,143 @@
+//! test that s390x vector types are passed using `PassMode::Direct`
+//! see also https://github.com/rust-lang/rust/issues/135744
+//@ add-core-stubs
+//@ compile-flags: --target s390x-unknown-linux-gnu -O
+//@ needs-llvm-components: systemz
+
+#![crate_type = "rlib"]
+#![feature(no_core, asm_experimental_arch)]
+#![feature(s390x_target_feature, simd_ffi, link_llvm_intrinsics, repr_simd)]
+#![no_core]
+
+extern crate minicore;
+use minicore::*;
+
+#[repr(simd)]
+struct i8x16([i8; 16]);
+
+#[repr(simd)]
+struct i16x8([i16; 8]);
+
+#[repr(simd)]
+struct i32x4([i32; 4]);
+
+#[repr(simd)]
+struct i64x2([i64; 2]);
+
+#[repr(simd)]
+struct f32x4([f32; 4]);
+
+#[repr(simd)]
+struct f64x2([f64; 2]);
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.smax.v16i8"]
+    fn vmxb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.smax.v8i16"]
+    fn vmxh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.smax.v4i32"]
+    fn vmxf(a: i32x4, b: i32x4) -> i32x4;
+    #[link_name = "llvm.smax.v2i64"]
+    fn vmxg(a: i64x2, b: i64x2) -> i64x2;
+}
+
+// CHECK-LABEL: define <16 x i8> @max_i8x16
+// CHECK-SAME: <16 x i8> %a, <16 x i8> %b
+// CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
+#[no_mangle]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn max_i8x16(a: i8x16, b: i8x16) -> i8x16 {
+    vmxb(a, b)
+}
+
+// CHECK-LABEL: define <8 x i16> @max_i16x8
+// CHECK-SAME: <8 x i16> %a, <8 x i16> %b
+// CHECK: call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
+#[no_mangle]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn max_i16x8(a: i16x8, b: i16x8) -> i16x8 {
+    vmxh(a, b)
+}
+
+// CHECK-LABEL: define <4 x i32> @max_i32x4
+// CHECK-SAME: <4 x i32> %a, <4 x i32> %b
+// CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
+#[no_mangle]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn max_i32x4(a: i32x4, b: i32x4) -> i32x4 {
+    vmxf(a, b)
+}
+
+// CHECK-LABEL: define <2 x i64> @max_i64x2
+// CHECK-SAME: <2 x i64> %a, <2 x i64> %b
+// CHECK: call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b)
+#[no_mangle]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn max_i64x2(a: i64x2, b: i64x2) -> i64x2 {
+    vmxg(a, b)
+}
+
+// CHECK-LABEL: define <4 x float> @choose_f32x4
+// CHECK-SAME: <4 x float> %a, <4 x float> %b
+#[no_mangle]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn choose_f32x4(a: f32x4, b: f32x4, c: bool) -> f32x4 {
+    if c { a } else { b }
+}
+
+// CHECK-LABEL: define <2 x double> @choose_f64x2
+// CHECK-SAME: <2 x double> %a, <2 x double> %b
+#[no_mangle]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn choose_f64x2(a: f64x2, b: f64x2, c: bool) -> f64x2 {
+    if c { a } else { b }
+}
+
+#[repr(C)]
+struct Wrapper<T>(T);
+
+#[no_mangle]
+#[inline(never)]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn max_wrapper_i8x16(a: Wrapper<i8x16>, b: Wrapper<i8x16>) -> Wrapper<i8x16> {
+    // CHECK-LABEL: max_wrapper_i8x16
+    // CHECK-SAME: sret([16 x i8])
+    // CHECK-SAME: <16 x i8>
+    // CHECK-SAME: <16 x i8>
+    // CHECK: call <16 x i8> @llvm.smax.v16i8
+    // CHECK-SAME: <16 x i8>
+    // CHECK-SAME: <16 x i8>
+    Wrapper(vmxb(a.0, b.0))
+}
+
+#[no_mangle]
+#[inline(never)]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn max_wrapper_i64x2(a: Wrapper<i64x2>, b: Wrapper<i64x2>) -> Wrapper<i64x2> {
+    // CHECK-LABEL: max_wrapper_i64x2
+    // CHECK-SAME: sret([16 x i8])
+    // CHECK-SAME: <16 x i8>
+    // CHECK-SAME: <16 x i8>
+    // CHECK: call <2 x i64> @llvm.smax.v2i64
+    // CHECK-SAME: <2 x i64>
+    // CHECK-SAME: <2 x i64>
+    Wrapper(vmxg(a.0, b.0))
+}
+
+#[no_mangle]
+#[inline(never)]
+#[target_feature(enable = "vector")]
+pub unsafe extern "C" fn choose_wrapper_f64x2(
+    a: Wrapper<f64x2>,
+    b: Wrapper<f64x2>,
+    c: bool,
+) -> Wrapper<f64x2> {
+    // CHECK-LABEL: choose_wrapper_f64x2
+    // CHECK-SAME: sret([16 x i8])
+    // CHECK-SAME: <16 x i8>
+    // CHECK-SAME: <16 x i8>
+    Wrapper(choose_f64x2(a.0, b.0, c))
+}
+
+// CHECK: declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-gather.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-gather.rs
index 10ceeecf900..605a0d520a7 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-gather.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-gather.rs
@@ -23,7 +23,9 @@ extern "rust-intrinsic" {
 #[no_mangle]
 pub unsafe fn gather_f32x2(pointers: Vec2<*const f32>, mask: Vec2<i32>,
                            values: Vec2<f32>) -> Vec2<f32> {
-    // CHECK: call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> {{.*}}, <2 x float> {{.*}})
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, <i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]], <2 x float> {{.*}})
     simd_gather(values, pointers, mask)
 }
 
@@ -31,6 +33,8 @@ pub unsafe fn gather_f32x2(pointers: Vec2<*const f32>, mask: Vec2<i32>,
 #[no_mangle]
 pub unsafe fn gather_pf32x2(pointers: Vec2<*const *const f32>, mask: Vec2<i32>,
                            values: Vec2<*const f32>) -> Vec2<*const f32> {
-    // CHECK: call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> {{.*}}, <2 x ptr> {{.*}})
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, <i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]], <2 x ptr> {{.*}})
     simd_gather(values, pointers, mask)
 }
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-load.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-load.rs
index 073dc0ac94d..015f6fd9cef 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-load.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-load.rs
@@ -21,7 +21,9 @@ extern "rust-intrinsic" {
 #[no_mangle]
 pub unsafe fn load_f32x2(mask: Vec2<i32>, pointer: *const f32,
                          values: Vec2<f32>) -> Vec2<f32> {
-    // CHECK: call <2 x float> @llvm.masked.load.v2f32.p0(ptr {{.*}}, i32 4, <2 x i1> {{.*}}, <2 x float> {{.*}})
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, <i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call <2 x float> @llvm.masked.load.v2f32.p0(ptr {{.*}}, i32 4, <2 x i1> [[B]], <2 x float> {{.*}})
     simd_masked_load(mask, pointer, values)
 }
 
@@ -29,6 +31,8 @@ pub unsafe fn load_f32x2(mask: Vec2<i32>, pointer: *const f32,
 #[no_mangle]
 pub unsafe fn load_pf32x4(mask: Vec4<i32>, pointer: *const *const f32,
                           values: Vec4<*const f32>) -> Vec4<*const f32> {
-    // CHECK: call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr {{.*}}, i32 {{.*}}, <4 x i1> {{.*}}, <4 x ptr> {{.*}})
+    // CHECK: [[A:%[0-9]+]] = lshr <4 x i32> {{.*}}, <i32 31, i32 31, i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <4 x i32> [[A]] to <4 x i1>
+    // CHECK: call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr {{.*}}, i32 {{.*}}, <4 x i1> [[B]], <4 x ptr> {{.*}})
     simd_masked_load(mask, pointer, values)
 }
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-store.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-store.rs
index 7c3393e6f2e..471a4bea181 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-store.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-masked-store.rs
@@ -20,13 +20,17 @@ extern "rust-intrinsic" {
 // CHECK-LABEL: @store_f32x2
 #[no_mangle]
 pub unsafe fn store_f32x2(mask: Vec2<i32>, pointer: *mut f32, values: Vec2<f32>) {
-    // CHECK: call void @llvm.masked.store.v2f32.p0(<2 x float> {{.*}}, ptr {{.*}}, i32 4, <2 x i1> {{.*}})
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, <i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call void @llvm.masked.store.v2f32.p0(<2 x float> {{.*}}, ptr {{.*}}, i32 4, <2 x i1> [[B]])
     simd_masked_store(mask, pointer, values)
 }
 
 // CHECK-LABEL: @store_pf32x4
 #[no_mangle]
 pub unsafe fn store_pf32x4(mask: Vec4<i32>, pointer: *mut *const f32, values: Vec4<*const f32>) {
-    // CHECK: call void @llvm.masked.store.v4p0.p0(<4 x ptr> {{.*}}, ptr {{.*}}, i32 {{.*}}, <4 x i1> {{.*}})
+    // CHECK: [[A:%[0-9]+]] = lshr <4 x i32> {{.*}}, <i32 31, i32 31, i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <4 x i32> [[A]] to <4 x i1>
+    // CHECK: call void @llvm.masked.store.v4p0.p0(<4 x ptr> {{.*}}, ptr {{.*}}, i32 {{.*}}, <4 x i1> [[B]])
     simd_masked_store(mask, pointer, values)
 }
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs
index 3c75ef5be40..1c42b2534d8 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-scatter.rs
@@ -23,7 +23,9 @@ extern "rust-intrinsic" {
 #[no_mangle]
 pub unsafe fn scatter_f32x2(pointers: Vec2<*mut f32>, mask: Vec2<i32>,
                             values: Vec2<f32>) {
-    // CHECK: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> {{.*}}, <2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> {{.*}})
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, <i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> {{.*}}, <2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]]
     simd_scatter(values, pointers, mask)
 }
 
@@ -32,6 +34,8 @@ pub unsafe fn scatter_f32x2(pointers: Vec2<*mut f32>, mask: Vec2<i32>,
 #[no_mangle]
 pub unsafe fn scatter_pf32x2(pointers: Vec2<*mut *const f32>, mask: Vec2<i32>,
                              values: Vec2<*const f32>) {
-    // CHECK: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> {{.*}}, <2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> {{.*}})
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> {{.*}}, <i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> {{.*}}, <2 x ptr> {{.*}}, i32 {{.*}}, <2 x i1> [[B]]
     simd_scatter(values, pointers, mask)
 }
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-select.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-select.rs
index c12fefa413b..a73593160f2 100644
--- a/tests/codegen/simd-intrinsic/simd-intrinsic-generic-select.rs
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-generic-select.rs
@@ -3,7 +3,7 @@
 #![crate_type = "lib"]
 
 #![feature(repr_simd, intrinsics)]
-#[allow(non_camel_case_types)]
+#![allow(non_camel_case_types)]
 
 #[repr(simd)]
 #[derive(Copy, Clone, PartialEq, Debug)]
@@ -17,21 +17,37 @@ pub struct f32x8([f32; 8]);
 #[derive(Copy, Clone, PartialEq, Debug)]
 pub struct b8x4(pub [i8; 4]);
 
+#[repr(simd)]
+#[derive(Copy, Clone, PartialEq, Debug)]
+pub struct i32x4([i32; 4]);
+
 extern "rust-intrinsic" {
     fn simd_select<T, U>(x: T, a: U, b: U) -> U;
     fn simd_select_bitmask<T, U>(x: T, a: U, b: U) -> U;
 }
 
-// CHECK-LABEL: @select
+// CHECK-LABEL: @select_m8
+#[no_mangle]
+pub unsafe fn select_m8(m: b8x4, a: f32x4, b: f32x4) -> f32x4 {
+    // CHECK: [[A:%[0-9]+]] = lshr <4 x i8> %{{.*}}, <i8 7, i8 7, i8 7, i8 7>
+    // CHECK: [[B:%[0-9]+]] = trunc <4 x i8> [[A]] to <4 x i1>
+    // CHECK: select <4 x i1> [[B]]
+    simd_select(m, a, b)
+}
+
+// CHECK-LABEL: @select_m32
 #[no_mangle]
-pub unsafe fn select(m: b8x4, a: f32x4, b: f32x4) -> f32x4 {
-    // CHECK: select <4 x i1>
+pub unsafe fn select_m32(m: i32x4, a: f32x4, b: f32x4) -> f32x4 {
+    // CHECK: [[A:%[0-9]+]] = lshr <4 x i32> %{{.*}}, <i32 31, i32 31, i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <4 x i32> [[A]] to <4 x i1>
+    // CHECK: select <4 x i1> [[B]]
     simd_select(m, a, b)
 }
 
 // CHECK-LABEL: @select_bitmask
 #[no_mangle]
 pub unsafe fn select_bitmask(m: i8, a: f32x8, b: f32x8) -> f32x8 {
-    // CHECK: select <8 x i1>
+    // CHECK: [[A:%[0-9]+]] = bitcast i8 {{.*}} to <8 x i1>
+    // CHECK: select <8 x i1> [[A]]
     simd_select_bitmask(m, a, b)
 }
diff --git a/tests/codegen/simd-intrinsic/simd-intrinsic-mask-reduce.rs b/tests/codegen/simd-intrinsic/simd-intrinsic-mask-reduce.rs
new file mode 100644
index 00000000000..4df246c2f5c
--- /dev/null
+++ b/tests/codegen/simd-intrinsic/simd-intrinsic-mask-reduce.rs
@@ -0,0 +1,65 @@
+//@ compile-flags: -C no-prepopulate-passes
+//
+
+#![crate_type = "lib"]
+#![feature(repr_simd, intrinsics)]
+#![allow(non_camel_case_types)]
+
+#[repr(simd)]
+#[derive(Copy, Clone)]
+pub struct mask32x2([i32; 2]);
+
+#[repr(simd)]
+#[derive(Copy, Clone)]
+pub struct mask8x16([i8; 16]);
+
+extern "rust-intrinsic" {
+    fn simd_reduce_all<T>(x: T) -> bool;
+    fn simd_reduce_any<T>(x: T) -> bool;
+}
+
+// NOTE(eddyb) `%{{x|1}}` is used because on some targets (e.g. WASM)
+// SIMD vectors are passed directly, resulting in `%x` being a vector,
+// while on others they're passed indirectly, resulting in `%x` being
+// a pointer to a vector, and `%1` a vector loaded from that pointer.
+// This is controlled by the target spec option `simd_types_indirect`.
+
+// CHECK-LABEL: @reduce_any_32x2
+#[no_mangle]
+pub unsafe fn reduce_any_32x2(x: mask32x2) -> bool {
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|1}}, <i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[B]])
+    // CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8
+    simd_reduce_any(x)
+}
+
+// CHECK-LABEL: @reduce_all_32x2
+#[no_mangle]
+pub unsafe fn reduce_all_32x2(x: mask32x2) -> bool {
+    // CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|1}}, <i32 31, i32 31>
+    // CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1>
+    // CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> [[B]])
+    // CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8
+    simd_reduce_all(x)
+}
+
+// CHECK-LABEL: @reduce_any_8x16
+#[no_mangle]
+pub unsafe fn reduce_any_8x16(x: mask8x16) -> bool {
+    // CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|1}}, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+    // CHECK: [[B:%[0-9]+]] = trunc <16 x i8> [[A]] to <16 x i1>
+    // CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[B]])
+    // CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8
+    simd_reduce_any(x)
+}
+
+// CHECK-LABEL: @reduce_all_8x16
+#[no_mangle]
+pub unsafe fn reduce_all_8x16(x: mask8x16) -> bool {
+    // CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|1}}, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+    // CHECK: [[B:%[0-9]+]] = trunc <16 x i8> [[A]] to <16 x i1>
+    // CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> [[B]])
+    // CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8
+    simd_reduce_all(x)
+}