about summary refs log tree commit diff
path: root/compiler/rustc_codegen_gcc/src/intrinsic
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_gcc/src/intrinsic')
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/archs.rs60
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/mod.rs9
-rw-r--r--compiler/rustc_codegen_gcc/src/intrinsic/simd.rs81
3 files changed, 149 insertions, 1 deletions
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
index 915ed875e32..d1b2a93243d 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs
@@ -95,8 +95,11 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str {
                     "cubema" => "__builtin_amdgcn_cubema",
                     "cubesc" => "__builtin_amdgcn_cubesc",
                     "cubetc" => "__builtin_amdgcn_cubetc",
+                    "cvt.f16.bf8" => "__builtin_amdgcn_cvt_f16_bf8",
+                    "cvt.f16.fp8" => "__builtin_amdgcn_cvt_f16_fp8",
                     "cvt.f32.bf8" => "__builtin_amdgcn_cvt_f32_bf8",
                     "cvt.f32.fp8" => "__builtin_amdgcn_cvt_f32_fp8",
+                    "cvt.f32.fp8.e5m3" => "__builtin_amdgcn_cvt_f32_fp8_e5m3",
                     "cvt.off.f32.i4" => "__builtin_amdgcn_cvt_off_f32_i4",
                     "cvt.pk.bf8.f32" => "__builtin_amdgcn_cvt_pk_bf8_f32",
                     "cvt.pk.f16.bf8" => "__builtin_amdgcn_cvt_pk_f16_bf8",
@@ -181,6 +184,12 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str {
                     "dot4.f32.fp8.bf8" => "__builtin_amdgcn_dot4_f32_fp8_bf8",
                     "dot4.f32.fp8.fp8" => "__builtin_amdgcn_dot4_f32_fp8_fp8",
                     "ds.add.gs.reg.rtn" => "__builtin_amdgcn_ds_add_gs_reg_rtn",
+                    "ds.atomic.async.barrier.arrive.b64" => {
+                        "__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64"
+                    }
+                    "ds.atomic.barrier.arrive.rtn.b64" => {
+                        "__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64"
+                    }
                     "ds.bpermute" => "__builtin_amdgcn_ds_bpermute",
                     "ds.bpermute.fi.b32" => "__builtin_amdgcn_ds_bpermute_fi_b32",
                     "ds.gws.barrier" => "__builtin_amdgcn_ds_gws_barrier",
@@ -198,8 +207,32 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str {
                     "fdot2.f16.f16" => "__builtin_amdgcn_fdot2_f16_f16",
                     "fdot2.f32.bf16" => "__builtin_amdgcn_fdot2_f32_bf16",
                     "fdot2c.f32.bf16" => "__builtin_amdgcn_fdot2c_f32_bf16",
+                    "flat.prefetch" => "__builtin_amdgcn_flat_prefetch",
                     "fmul.legacy" => "__builtin_amdgcn_fmul_legacy",
+                    "global.load.async.to.lds.b128" => {
+                        "__builtin_amdgcn_global_load_async_to_lds_b128"
+                    }
+                    "global.load.async.to.lds.b32" => {
+                        "__builtin_amdgcn_global_load_async_to_lds_b32"
+                    }
+                    "global.load.async.to.lds.b64" => {
+                        "__builtin_amdgcn_global_load_async_to_lds_b64"
+                    }
+                    "global.load.async.to.lds.b8" => "__builtin_amdgcn_global_load_async_to_lds_b8",
                     "global.load.lds" => "__builtin_amdgcn_global_load_lds",
+                    "global.prefetch" => "__builtin_amdgcn_global_prefetch",
+                    "global.store.async.from.lds.b128" => {
+                        "__builtin_amdgcn_global_store_async_from_lds_b128"
+                    }
+                    "global.store.async.from.lds.b32" => {
+                        "__builtin_amdgcn_global_store_async_from_lds_b32"
+                    }
+                    "global.store.async.from.lds.b64" => {
+                        "__builtin_amdgcn_global_store_async_from_lds_b64"
+                    }
+                    "global.store.async.from.lds.b8" => {
+                        "__builtin_amdgcn_global_store_async_from_lds_b8"
+                    }
                     "groupstaticsize" => "__builtin_amdgcn_groupstaticsize",
                     "iglp.opt" => "__builtin_amdgcn_iglp_opt",
                     "implicit.buffer.ptr" => "__builtin_amdgcn_implicit_buffer_ptr",
@@ -291,6 +324,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str {
                     "s.incperflevel" => "__builtin_amdgcn_s_incperflevel",
                     "s.memrealtime" => "__builtin_amdgcn_s_memrealtime",
                     "s.memtime" => "__builtin_amdgcn_s_memtime",
+                    "s.monitor.sleep" => "__builtin_amdgcn_s_monitor_sleep",
                     "s.sendmsg" => "__builtin_amdgcn_s_sendmsg",
                     "s.sendmsghalt" => "__builtin_amdgcn_s_sendmsghalt",
                     "s.setprio" => "__builtin_amdgcn_s_setprio",
@@ -300,11 +334,15 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str {
                     "s.sleep.var" => "__builtin_amdgcn_s_sleep_var",
                     "s.ttracedata" => "__builtin_amdgcn_s_ttracedata",
                     "s.ttracedata.imm" => "__builtin_amdgcn_s_ttracedata_imm",
+                    "s.wait.asynccnt" => "__builtin_amdgcn_s_wait_asynccnt",
                     "s.wait.event.export.ready" => "__builtin_amdgcn_s_wait_event_export_ready",
+                    "s.wait.tensorcnt" => "__builtin_amdgcn_s_wait_tensorcnt",
                     "s.waitcnt" => "__builtin_amdgcn_s_waitcnt",
                     "sad.hi.u8" => "__builtin_amdgcn_sad_hi_u8",
                     "sad.u16" => "__builtin_amdgcn_sad_u16",
                     "sad.u8" => "__builtin_amdgcn_sad_u8",
+                    "sat.pk4.i4.i8" => "__builtin_amdgcn_sat_pk4_i4_i8",
+                    "sat.pk4.u4.u8" => "__builtin_amdgcn_sat_pk4_u4_u8",
                     "sched.barrier" => "__builtin_amdgcn_sched_barrier",
                     "sched.group.barrier" => "__builtin_amdgcn_sched_group_barrier",
                     "sdot2" => "__builtin_amdgcn_sdot2",
@@ -346,8 +384,13 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str {
                     "smfmac.i32.16x16x64.i8" => "__builtin_amdgcn_smfmac_i32_16x16x64_i8",
                     "smfmac.i32.32x32x32.i8" => "__builtin_amdgcn_smfmac_i32_32x32x32_i8",
                     "smfmac.i32.32x32x64.i8" => "__builtin_amdgcn_smfmac_i32_32x32x64_i8",
+                    "struct.ptr.buffer.load.lds" => "__builtin_amdgcn_struct_ptr_buffer_load_lds",
                     "sudot4" => "__builtin_amdgcn_sudot4",
                     "sudot8" => "__builtin_amdgcn_sudot8",
+                    "tensor.load.to.lds" => "__builtin_amdgcn_tensor_load_to_lds",
+                    "tensor.load.to.lds.d2" => "__builtin_amdgcn_tensor_load_to_lds_d2",
+                    "tensor.store.from.lds" => "__builtin_amdgcn_tensor_store_from_lds",
+                    "tensor.store.from.lds.d2" => "__builtin_amdgcn_tensor_store_from_lds_d2",
                     "udot2" => "__builtin_amdgcn_udot2",
                     "udot4" => "__builtin_amdgcn_udot4",
                     "udot8" => "__builtin_amdgcn_udot8",
@@ -6326,6 +6369,23 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str {
             }
             s390(name, full_name)
         }
+        "spv" => {
+            #[allow(non_snake_case)]
+            fn spv(name: &str, full_name: &str) -> &'static str {
+                match name {
+                    // spv
+                    "num.subgroups" => "__builtin_spirv_num_subgroups",
+                    "subgroup.id" => "__builtin_spirv_subgroup_id",
+                    "subgroup.local.invocation.id" => {
+                        "__builtin_spirv_subgroup_local_invocation_id"
+                    }
+                    "subgroup.max.size" => "__builtin_spirv_subgroup_max_size",
+                    "subgroup.size" => "__builtin_spirv_subgroup_size",
+                    _ => unimplemented!("***** unsupported LLVM intrinsic {full_name}"),
+                }
+            }
+            spv(name, full_name)
+        }
         "ve" => {
             #[allow(non_snake_case)]
             fn ve(name: &str, full_name: &str) -> &'static str {
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
index 0753ac1aeb8..eb0a5336a1f 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@@ -925,10 +925,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         // TODO(antoyo): use width?
         let arg_type = arg.get_type();
         let result_type = self.u32_type;
+        let arg = if arg_type.is_signed(self.cx) {
+            let new_type = arg_type.to_unsigned(self.cx);
+            self.gcc_int_cast(arg, new_type)
+        } else {
+            arg
+        };
+        let arg_type = arg.get_type();
         let count_leading_zeroes =
             // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
             // instead of using is_uint().
-            if arg_type.is_uint(self.cx) {
+            if arg_type.is_uchar(self.cx) || arg_type.is_ushort(self.cx) || arg_type.is_uint(self.cx) {
                 "__builtin_clz"
             }
             else if arg_type.is_ulong(self.cx) {
diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
index 350915a277e..fdc15d580ef 100644
--- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs
@@ -206,6 +206,28 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         );
     }
 
+    #[cfg(feature = "master")]
+    if name == sym::simd_funnel_shl {
+        return Ok(simd_funnel_shift(
+            bx,
+            args[0].immediate(),
+            args[1].immediate(),
+            args[2].immediate(),
+            true,
+        ));
+    }
+
+    #[cfg(feature = "master")]
+    if name == sym::simd_funnel_shr {
+        return Ok(simd_funnel_shift(
+            bx,
+            args[0].immediate(),
+            args[1].immediate(),
+            args[2].immediate(),
+            false,
+        ));
+    }
+
     if name == sym::simd_bswap {
         return Ok(simd_bswap(bx, args[0].immediate()));
     }
@@ -1434,3 +1456,62 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
 
     unimplemented!("simd {}", name);
 }
+
+#[cfg(feature = "master")]
+fn simd_funnel_shift<'a, 'gcc, 'tcx>(
+    bx: &mut Builder<'a, 'gcc, 'tcx>,
+    a: RValue<'gcc>,
+    b: RValue<'gcc>,
+    shift: RValue<'gcc>,
+    shift_left: bool,
+) -> RValue<'gcc> {
+    use crate::common::SignType;
+
+    let a_type = a.get_type();
+    let vector_type = a_type.unqualified().dyncast_vector().expect("vector type");
+    let num_units = vector_type.get_num_units();
+    let elem_type = vector_type.get_element_type();
+
+    let (new_int_type, int_shift_val, int_mask) = if elem_type.is_compatible_with(bx.u8_type)
+        || elem_type.is_compatible_with(bx.i8_type)
+    {
+        (bx.u16_type, 8, u8::MAX as u64)
+    } else if elem_type.is_compatible_with(bx.u16_type) || elem_type.is_compatible_with(bx.i16_type)
+    {
+        (bx.u32_type, 16, u16::MAX as u64)
+    } else if elem_type.is_compatible_with(bx.u32_type) || elem_type.is_compatible_with(bx.i32_type)
+    {
+        (bx.u64_type, 32, u32::MAX as u64)
+    } else if elem_type.is_compatible_with(bx.u64_type) || elem_type.is_compatible_with(bx.i64_type)
+    {
+        (bx.u128_type, 64, u64::MAX)
+    } else {
+        unimplemented!("funnel shift on {:?}", elem_type);
+    };
+
+    let int_mask = bx.context.new_rvalue_from_long(new_int_type, int_mask as i64);
+    let int_shift_val = bx.context.new_rvalue_from_int(new_int_type, int_shift_val);
+    let mut elements = vec![];
+    let unsigned_type = elem_type.to_unsigned(bx);
+    for i in 0..num_units {
+        let index = bx.context.new_rvalue_from_int(bx.int_type, i as i32);
+        let a_val = bx.context.new_vector_access(None, a, index).to_rvalue();
+        let a_val = bx.context.new_bitcast(None, a_val, unsigned_type);
+        // TODO: we probably need to use gcc_int_cast instead.
+        let a_val = bx.gcc_int_cast(a_val, new_int_type);
+        let b_val = bx.context.new_vector_access(None, b, index).to_rvalue();
+        let b_val = bx.context.new_bitcast(None, b_val, unsigned_type);
+        let b_val = bx.gcc_int_cast(b_val, new_int_type);
+        let shift_val = bx.context.new_vector_access(None, shift, index).to_rvalue();
+        let shift_val = bx.gcc_int_cast(shift_val, new_int_type);
+        let mut val = a_val << int_shift_val | b_val;
+        if shift_left {
+            val = (val << shift_val) >> int_shift_val;
+        } else {
+            val = (val >> shift_val) & int_mask;
+        }
+        let val = bx.gcc_int_cast(val, elem_type);
+        elements.push(val);
+    }
+    bx.context.new_rvalue_from_vector(None, a_type, &elements)
+}