diff options
| author | Antoni Boucher <bouanto@zoho.com> | 2024-02-20 10:24:06 -0500 |
|---|---|---|
| committer | Antoni Boucher <bouanto@zoho.com> | 2024-02-20 10:24:06 -0500 |
| commit | f6e16e95df4ebe6e954ba4c9021e28205c88703c (patch) | |
| tree | e50c892e1c403ffd6fbec075c09e037b2341d9c2 /src | |
| parent | 6bbbf59951a6ef9f54f5f0e2849bba2ac53928a5 (diff) | |
| parent | 3e02db23af33a0b47d941240843327a6c0539ba5 (diff) | |
| download | rust-f6e16e95df4ebe6e954ba4c9021e28205c88703c.tar.gz rust-f6e16e95df4ebe6e954ba4c9021e28205c88703c.zip | |
Merge branch 'master' into sync_from_rust_2024_02_20
Diffstat (limited to 'src')
| -rw-r--r-- | src/back/write.rs | 3 | ||||
| -rw-r--r-- | src/base.rs | 2 | ||||
| -rw-r--r-- | src/builder.rs | 21 | ||||
| -rw-r--r-- | src/consts.rs | 2 | ||||
| -rw-r--r-- | src/declare.rs | 10 | ||||
| -rw-r--r-- | src/int.rs | 2 | ||||
| -rw-r--r-- | src/intrinsic/archs.rs | 27 | ||||
| -rw-r--r-- | src/intrinsic/llvm.rs | 2 | ||||
| -rw-r--r-- | src/intrinsic/simd.rs | 216 | ||||
| -rw-r--r-- | src/lib.rs | 3 | ||||
| -rw-r--r-- | src/mono_item.rs | 2 |
11 files changed, 253 insertions, 37 deletions
diff --git a/src/back/write.rs b/src/back/write.rs index 2f8a54f529c..eea62adca07 100644 --- a/src/back/write.rs +++ b/src/back/write.rs @@ -70,7 +70,8 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, dcx: &Dia } if config.emit_ir { - unimplemented!(); + let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name); + std::fs::write(out, "").expect("write file"); } if config.emit_asm { diff --git a/src/base.rs b/src/base.rs index b0788718da4..773e234150d 100644 --- a/src/base.rs +++ b/src/base.rs @@ -164,7 +164,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Lock context.add_driver_option("-v"); } - // NOTE: The codegen generates unrechable blocks. + // NOTE: The codegen generates unreachable blocks. context.set_allow_unreachable_blocks(true); { diff --git a/src/builder.rs b/src/builder.rs index 42e61b3ccb5..56d9fd30bf6 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -606,12 +606,29 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // ../../../gcc/gcc/cfgexpand.cc:6069 // 0x7f0101bf9194 execute // ../../../gcc/gcc/cfgexpand.cc:6795 - if a.get_type().is_compatible_with(self.cx.float_type) { + let a_type = a.get_type(); + let a_type_unqualified = a_type.unqualified(); + if a_type.is_compatible_with(self.cx.float_type) { let fmodf = self.context.get_builtin_function("fmodf"); // FIXME(antoyo): this seems to produce the wrong result. return self.context.new_call(None, fmodf, &[a, b]); } - assert_eq!(a.get_type().unqualified(), self.cx.double_type); + else if let Some(vector_type) = a_type_unqualified.dyncast_vector() { + assert_eq!(a_type_unqualified, b.get_type().unqualified()); + + let num_units = vector_type.get_num_units(); + let new_elements: Vec<_> = (0..num_units) + .map(|i| { + let index = self.context.new_rvalue_from_long(self.cx.type_u32(), i as _); + let x = self.extract_element(a, index).to_rvalue(); + let y = self.extract_element(b, index).to_rvalue(); + self.frem(x, y) + }) + .collect(); + + return self.context.new_rvalue_from_vector(None, a_type, &new_elements) + } + assert_eq!(a_type_unqualified, self.cx.double_type); let fmod = self.context.get_builtin_function("fmod"); return self.context.new_call(None, fmod, &[a, b]); diff --git a/src/consts.rs b/src/consts.rs index 70d8db02247..054741e1642 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -235,7 +235,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { if !self.tcx.is_reachable_non_generic(def_id) { #[cfg(feature = "master")] - global.add_attribute(VarAttribute::Visibility(Visibility::Hidden)); + global.add_string_attribute(VarAttribute::Visibility(Visibility::Hidden)); } global diff --git a/src/declare.rs b/src/declare.rs index 247454fa58e..72cba9fbba9 100644 --- a/src/declare.rs +++ b/src/declare.rs @@ -125,7 +125,9 @@ fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*ll let params: Vec<_> = param_types.into_iter().enumerate() .map(|(index, param)| cx.context.new_parameter(None, *param, &format!("param{}", index))) // TODO(antoyo): set name. .collect(); - let func = cx.context.new_function(None, cx.linkage.get(), return_type, ¶ms, mangle_name(name), variadic); + #[cfg(not(feature="master"))] + let name = mangle_name(name); + let func = cx.context.new_function(None, cx.linkage.get(), return_type, ¶ms, &name, variadic); cx.functions.borrow_mut().insert(name.to_string(), func); #[cfg(feature="master")] @@ -179,8 +181,10 @@ fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*ll } // FIXME(antoyo): this is a hack because libgccjit currently only supports alpha, num and _. -// Unsupported characters: `$` and `.`. -pub fn mangle_name(name: &str) -> String { +// Unsupported characters: `$`, `.` and `*`. +// FIXME(antoyo): `*` might not be expected: https://github.com/rust-lang/rust/issues/116979#issuecomment-1840926865 +#[cfg(not(feature="master"))] +fn mangle_name(name: &str) -> String { name.replace(|char: char| { if !char.is_alphanumeric() && char != '_' { debug_assert!("$.*".contains(char), "Unsupported char in function name {}: {}", name, char); diff --git a/src/int.rs b/src/int.rs index 9b9b3ea4f87..b69e073c4d9 100644 --- a/src/int.rs +++ b/src/int.rs @@ -48,7 +48,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { pub fn gcc_neg(&self, a: RValue<'gcc>) -> RValue<'gcc> { let a_type = a.get_type(); - if self.is_native_int_type(a_type) { + if self.is_native_int_type(a_type) || a_type.is_vector() { self.cx.context.new_unary_op(None, UnaryOp::Minus, a.get_type(), a) } else { diff --git a/src/intrinsic/archs.rs b/src/intrinsic/archs.rs index 15d67385c3e..c4ae1751fa0 100644 --- a/src/intrinsic/archs.rs +++ b/src/intrinsic/archs.rs @@ -151,8 +151,10 @@ match name { "llvm.amdgcn.msad.u8" => "__builtin_amdgcn_msad_u8", "llvm.amdgcn.perm" => "__builtin_amdgcn_perm", "llvm.amdgcn.permlane16" => "__builtin_amdgcn_permlane16", + "llvm.amdgcn.permlane16.var" => "__builtin_amdgcn_permlane16_var", "llvm.amdgcn.permlane64" => "__builtin_amdgcn_permlane64", "llvm.amdgcn.permlanex16" => "__builtin_amdgcn_permlanex16", + "llvm.amdgcn.permlanex16.var" => "__builtin_amdgcn_permlanex16_var", "llvm.amdgcn.qsad.pk.u16.u8" => "__builtin_amdgcn_qsad_pk_u16_u8", "llvm.amdgcn.queue.ptr" => "__builtin_amdgcn_queue_ptr", "llvm.amdgcn.rcp.legacy" => "__builtin_amdgcn_rcp_legacy", @@ -160,11 +162,20 @@ match name { "llvm.amdgcn.readlane" => "__builtin_amdgcn_readlane", "llvm.amdgcn.rsq.legacy" => "__builtin_amdgcn_rsq_legacy", "llvm.amdgcn.s.barrier" => "__builtin_amdgcn_s_barrier", + "llvm.amdgcn.s.barrier.init" => "__builtin_amdgcn_s_barrier_init", + "llvm.amdgcn.s.barrier.join" => "__builtin_amdgcn_s_barrier_join", + "llvm.amdgcn.s.barrier.leave" => "__builtin_amdgcn_s_barrier_leave", + "llvm.amdgcn.s.barrier.signal" => "__builtin_amdgcn_s_barrier_signal", + "llvm.amdgcn.s.barrier.signal.isfirst" => "__builtin_amdgcn_s_barrier_signal_isfirst", + "llvm.amdgcn.s.barrier.signal.isfirst.var" => "__builtin_amdgcn_s_barrier_signal_isfirst_var", + "llvm.amdgcn.s.barrier.signal.var" => "__builtin_amdgcn_s_barrier_signal_var", + "llvm.amdgcn.s.barrier.wait" => "__builtin_amdgcn_s_barrier_wait", "llvm.amdgcn.s.dcache.inv" => "__builtin_amdgcn_s_dcache_inv", "llvm.amdgcn.s.dcache.inv.vol" => "__builtin_amdgcn_s_dcache_inv_vol", "llvm.amdgcn.s.dcache.wb" => "__builtin_amdgcn_s_dcache_wb", "llvm.amdgcn.s.dcache.wb.vol" => "__builtin_amdgcn_s_dcache_wb_vol", "llvm.amdgcn.s.decperflevel" => "__builtin_amdgcn_s_decperflevel", + "llvm.amdgcn.s.get.barrier.state" => "__builtin_amdgcn_s_get_barrier_state", "llvm.amdgcn.s.get.waveid.in.workgroup" => "__builtin_amdgcn_s_get_waveid_in_workgroup", "llvm.amdgcn.s.getpc" => "__builtin_amdgcn_s_getpc", "llvm.amdgcn.s.getreg" => "__builtin_amdgcn_s_getreg", @@ -176,8 +187,10 @@ match name { "llvm.amdgcn.s.setprio" => "__builtin_amdgcn_s_setprio", "llvm.amdgcn.s.setreg" => "__builtin_amdgcn_s_setreg", "llvm.amdgcn.s.sleep" => "__builtin_amdgcn_s_sleep", + "llvm.amdgcn.s.sleep.var" => "__builtin_amdgcn_s_sleep_var", "llvm.amdgcn.s.wait.event.export.ready" => "__builtin_amdgcn_s_wait_event_export_ready", "llvm.amdgcn.s.waitcnt" => "__builtin_amdgcn_s_waitcnt", + "llvm.amdgcn.s.wakeup.barrier" => "__builtin_amdgcn_s_wakeup_barrier", "llvm.amdgcn.sad.hi.u8" => "__builtin_amdgcn_sad_hi_u8", "llvm.amdgcn.sad.u16" => "__builtin_amdgcn_sad_u16", "llvm.amdgcn.sad.u8" => "__builtin_amdgcn_sad_u8", @@ -314,6 +327,8 @@ match name { // bpf "llvm.bpf.btf.type.id" => "__builtin_bpf_btf_type_id", "llvm.bpf.compare" => "__builtin_bpf_compare", + "llvm.bpf.getelementptr.and.load" => "__builtin_bpf_getelementptr_and_load", + "llvm.bpf.getelementptr.and.store" => "__builtin_bpf_getelementptr_and_store", "llvm.bpf.load.byte" => "__builtin_bpf_load_byte", "llvm.bpf.load.half" => "__builtin_bpf_load_half", "llvm.bpf.load.word" => "__builtin_bpf_load_word", @@ -5776,14 +5791,6 @@ match name { "llvm.s390.verimf" => "__builtin_s390_verimf", "llvm.s390.verimg" => "__builtin_s390_verimg", "llvm.s390.verimh" => "__builtin_s390_verimh", - "llvm.s390.verllb" => "__builtin_s390_verllb", - "llvm.s390.verllf" => "__builtin_s390_verllf", - "llvm.s390.verllg" => "__builtin_s390_verllg", - "llvm.s390.verllh" => "__builtin_s390_verllh", - "llvm.s390.verllvb" => "__builtin_s390_verllvb", - "llvm.s390.verllvf" => "__builtin_s390_verllvf", - "llvm.s390.verllvg" => "__builtin_s390_verllvg", - "llvm.s390.verllvh" => "__builtin_s390_verllvh", "llvm.s390.vfaeb" => "__builtin_s390_vfaeb", "llvm.s390.vfaef" => "__builtin_s390_vfaef", "llvm.s390.vfaeh" => "__builtin_s390_vfaeh", @@ -5815,7 +5822,7 @@ match name { "llvm.s390.vistrh" => "__builtin_s390_vistrh", "llvm.s390.vlbb" => "__builtin_s390_vlbb", "llvm.s390.vll" => "__builtin_s390_vll", - "llvm.s390.vlrl" => "__builtin_s390_vlrl", + "llvm.s390.vlrl" => "__builtin_s390_vlrlr", "llvm.s390.vmaeb" => "__builtin_s390_vmaeb", "llvm.s390.vmaef" => "__builtin_s390_vmaef", "llvm.s390.vmaeh" => "__builtin_s390_vmaeh", @@ -5885,7 +5892,7 @@ match name { "llvm.s390.vstrczb" => "__builtin_s390_vstrczb", "llvm.s390.vstrczf" => "__builtin_s390_vstrczf", "llvm.s390.vstrczh" => "__builtin_s390_vstrczh", - "llvm.s390.vstrl" => "__builtin_s390_vstrl", + "llvm.s390.vstrl" => "__builtin_s390_vstrlr", "llvm.s390.vsumb" => "__builtin_s390_vsumb", "llvm.s390.vsumgf" => "__builtin_s390_vsumgf", "llvm.s390.vsumgh" => "__builtin_s390_vsumgh", diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 35eb4a11005..0d2ce20c654 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -262,7 +262,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc }, // NOTE: the LLVM intrinsic receives 3 floats, but the GCC builtin requires 3 vectors. // FIXME: the intrinsics like _mm_mask_fmadd_sd should probably directly call the GCC - // instrinsic to avoid this. + // intrinsic to avoid this. "__builtin_ia32_vfmaddss3_round" => { let new_args = args.to_vec(); let arg1_type = gcc_func.get_param_type(0); diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs index 9fa978cd2ef..cecc982bb1f 100644 --- a/src/intrinsic/simd.rs +++ b/src/intrinsic/simd.rs @@ -1,3 +1,5 @@ +use std::iter::FromIterator; + use gccjit::ToRValue; use gccjit::{BinaryOp, RValue, Type}; #[cfg(feature = "master")] @@ -21,6 +23,8 @@ use rustc_target::abi::Align; use crate::builder::Builder; #[cfg(feature = "master")] use crate::context::CodegenCx; +#[cfg(not(feature = "master"))] +use crate::common::SignType; pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( bx: &mut Builder<'a, 'gcc, 'tcx>, @@ -156,6 +160,195 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( return Ok(compare_simd_types(bx, arg1, arg2, in_elem, llret_ty, cmp_op)); } + let simd_bswap = |bx: &mut Builder<'a, 'gcc, 'tcx>, vector: RValue<'gcc>| -> RValue<'gcc> { + let v_type = vector.get_type(); + let vector_type = v_type.unqualified().dyncast_vector().expect("vector type"); + let elem_type = vector_type.get_element_type(); + let elem_size_bytes = elem_type.get_size(); + if elem_size_bytes == 1 { + return vector; + } + + let type_size_bytes = elem_size_bytes as u64 * in_len; + let shuffle_indices = Vec::from_iter(0..type_size_bytes); + let byte_vector_type = bx.context.new_vector_type(bx.type_u8(), type_size_bytes); + let byte_vector = bx.context.new_bitcast(None, args[0].immediate(), byte_vector_type); + + #[cfg(not(feature = "master"))] + let shuffled = { + let new_elements: Vec<_> = shuffle_indices.chunks_exact(elem_size_bytes as _) + .flat_map(|x| x.iter().rev()) + .map(|&i| { + let index = bx.context.new_rvalue_from_long(bx.u64_type, i as _); + bx.extract_element(byte_vector, index) + }) + .collect(); + + bx.context.new_rvalue_from_vector(None, byte_vector_type, &new_elements) + }; + #[cfg(feature = "master")] + let shuffled = { + let indices: Vec<_> = shuffle_indices.chunks_exact(elem_size_bytes as _) + .flat_map(|x| x.iter().rev()) + .map(|&i| bx.context.new_rvalue_from_int(bx.u8_type, i as _)) + .collect(); + + let mask = bx.context.new_rvalue_from_vector(None, byte_vector_type, &indices); + bx.context.new_rvalue_vector_perm(None, byte_vector, byte_vector, mask) + }; + bx.context.new_bitcast(None, shuffled, v_type) + }; + + if name == sym::simd_bswap || name == sym::simd_bitreverse { + require!( + bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer, + InvalidMonomorphization::UnsupportedOperation { + span, + name, + in_ty, + in_elem, + } + ); + } + + if name == sym::simd_bswap { + return Ok(simd_bswap(bx, args[0].immediate())); + } + + // We use a different algorithm from non-vector bitreverse to take advantage of most + // processors' vector shuffle units. It works like this: + // 1. Generate pre-reversed low and high nibbles as a vector. + // 2. Byte-swap the input. + // 3. Mask off the low and high nibbles of each byte in the byte-swapped input. + // 4. Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask. + // 5. Combine the results of the shuffle back together and cast back to the original type. + #[cfg(feature = "master")] + if name == sym::simd_bitreverse { + let vector = args[0].immediate(); + let v_type = vector.get_type(); + let vector_type = v_type.unqualified().dyncast_vector().expect("vector type"); + let elem_type = vector_type.get_element_type(); + let elem_size_bytes = elem_type.get_size(); + + let type_size_bytes = elem_size_bytes as u64 * in_len; + // We need to ensure at least 16 entries in our vector type, since the pre-reversed vectors + // we generate below have 16 entries in them. `new_rvalue_vector_perm` requires the mask + // vector to be of the same length as the source vectors. + let byte_vector_type_size = type_size_bytes.max(16); + + let byte_vector_type = bx.context.new_vector_type(bx.u8_type, type_size_bytes); + let long_byte_vector_type = bx.context.new_vector_type(bx.u8_type, byte_vector_type_size); + + // Step 1: Generate pre-reversed low and high nibbles as a vector. + let zero_byte = bx.context.new_rvalue_zero(bx.u8_type); + let hi_nibble_elements: Vec<_> = (0u8..16) + .map(|x| bx.context.new_rvalue_from_int(bx.u8_type, x.reverse_bits() as _)) + .chain((16..byte_vector_type_size).map(|_| zero_byte)) + .collect(); + let hi_nibble = bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &hi_nibble_elements); + + let lo_nibble_elements: Vec<_> = (0u8..16) + .map(|x| bx.context.new_rvalue_from_int(bx.u8_type, (x.reverse_bits() >> 4) as _)) + .chain((16..byte_vector_type_size).map(|_| zero_byte)) + .collect(); + let lo_nibble = bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &lo_nibble_elements); + + let mask = bx.context.new_rvalue_from_vector( + None, + long_byte_vector_type, + &vec![bx.context.new_rvalue_from_int(bx.u8_type, 0x0f); byte_vector_type_size as _]); + + let four_vec = bx.context.new_rvalue_from_vector( + None, + long_byte_vector_type, + &vec![bx.context.new_rvalue_from_int(bx.u8_type, 4); byte_vector_type_size as _]); + + // Step 2: Byte-swap the input. + let swapped = simd_bswap(bx, args[0].immediate()); + let byte_vector = bx.context.new_bitcast(None, swapped, byte_vector_type); + + // We're going to need to extend the vector with zeros to make sure that the types are the + // same, since that's what new_rvalue_vector_perm expects. + let byte_vector = if byte_vector_type_size > type_size_bytes { + let mut byte_vector_elements = Vec::with_capacity(byte_vector_type_size as _); + for i in 0..type_size_bytes { + let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _); + let val = bx.extract_element(byte_vector, idx); + byte_vector_elements.push(val); + } + for _ in type_size_bytes..byte_vector_type_size { + byte_vector_elements.push(zero_byte); + } + bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &byte_vector_elements) + } else { + bx.context.new_bitcast(None, byte_vector, long_byte_vector_type) + }; + + // Step 3: Mask off the low and high nibbles of each byte in the byte-swapped input. + let masked_hi = (byte_vector >> four_vec) & mask; + let masked_lo = byte_vector & mask; + + // Step 4: Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask. + let hi = bx.context.new_rvalue_vector_perm(None, hi_nibble, hi_nibble, masked_lo); + let lo = bx.context.new_rvalue_vector_perm(None, lo_nibble, lo_nibble, masked_hi); + + // Step 5: Combine the results of the shuffle back together and cast back to the original type. + let result = hi | lo; + let cast_ty = bx.context.new_vector_type(elem_type, byte_vector_type_size / (elem_size_bytes as u64)); + + // we might need to truncate if sizeof(v_type) < sizeof(cast_type) + if type_size_bytes < byte_vector_type_size { + let cast_result = bx.context.new_bitcast(None, result, cast_ty); + let elems: Vec<_> = (0..in_len) + .map(|i| { + let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _); + bx.extract_element(cast_result, idx) + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(None, v_type, &elems)) + } else { + // avoid the unnecessary truncation as an optimization. + return Ok(bx.context.new_bitcast(None, result, v_type)); + } + } + // since gcc doesn't have vector shuffle methods available in non-patched builds, fallback to + // component-wise bitreverses if they're not available. + #[cfg(not(feature = "master"))] + if name == sym::simd_bitreverse { + let vector = args[0].immediate(); + let vector_ty = vector.get_type(); + let vector_type = vector_ty.unqualified().dyncast_vector().expect("vector type"); + let num_elements = vector_type.get_num_units(); + + let elem_type = vector_type.get_element_type(); + let elem_size_bytes = elem_type.get_size(); + let num_type = elem_type.to_unsigned(bx.cx); + let new_elements: Vec<_> = (0..num_elements) + .map(|idx| { + let index = bx.context.new_rvalue_from_long(num_type, idx as _); + let extracted_value = bx.extract_element(vector, index).to_rvalue(); + bx.bit_reverse(elem_size_bytes as u64 * 8, extracted_value) + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(None, vector_ty, &new_elements)); + } + + if name == sym::simd_ctlz || name == sym::simd_cttz { + let vector = args[0].immediate(); + let elements: Vec<_> = (0..in_len) + .map(|i| { + let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64); + let value = bx.extract_element(vector, index).to_rvalue(); + if name == sym::simd_ctlz { + bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value) + } else { + bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value) + } + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements)); + } + if name == sym::simd_shuffle { // Make sure this is actually an array, since typeck only checks the length-suffixed // version of this intrinsic. @@ -504,20 +697,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( default: RValue<'gcc>, pointers: RValue<'gcc>, mask: RValue<'gcc>, - pointer_count: usize, bx: &mut Builder<'a, 'gcc, 'tcx>, in_len: u64, - underlying_ty: Ty<'tcx>, invert: bool, ) -> RValue<'gcc> { - let vector_type = if pointer_count > 1 { - bx.context.new_vector_type(bx.usize_type, in_len) - } else { - vector_ty(bx, underlying_ty, in_len) - }; - let elem_type = vector_type.dyncast_vector().expect("vector type").get_element_type(); + let vector_type = default.get_type(); + let elem_type = vector_type.unqualified().dyncast_vector().expect("vector type").get_element_type(); - let mut values = vec![]; + let mut values = Vec::with_capacity(in_len as usize); for i in 0..in_len { let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64); let int = bx.context.new_vector_access(None, pointers, index).to_rvalue(); @@ -530,13 +717,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( let vector = bx.context.new_rvalue_from_vector(None, vector_type, &values); - let mut mask_types = vec![]; - let mut mask_values = vec![]; + let mut mask_types = Vec::with_capacity(in_len as usize); + let mut mask_values = Vec::with_capacity(in_len as usize); for i in 0..in_len { let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64); mask_types.push(bx.context.new_field(None, bx.i32_type, "m")); let mask_value = bx.context.new_vector_access(None, mask, index).to_rvalue(); - let masked = bx.context.new_rvalue_from_int(bx.i32_type, in_len as i32) & mask_value; + let mask_value_cast = bx.context.new_cast(None, mask_value, bx.i32_type); + let masked = bx.context.new_rvalue_from_int(bx.i32_type, in_len as i32) & mask_value_cast; let value = index + masked; mask_values.push(value); } @@ -665,10 +853,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( args[0].immediate(), args[1].immediate(), args[2].immediate(), - pointer_count, bx, in_len, - underlying_ty, false, )); } @@ -783,10 +969,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( args[0].immediate(), args[1].immediate(), args[2].immediate(), - pointer_count, bx, in_len, - underlying_ty, true, ); diff --git a/src/lib.rs b/src/lib.rs index 09ce059476e..7f0696740b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html). * For Thin LTO, this might be helpful: * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none. + * Or the new incremental LTO? * * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans. @@ -27,6 +28,7 @@ #![recursion_limit="256"] #![warn(rust_2018_idioms)] #![warn(unused_lifetimes)] +#![deny(clippy::pattern_type_mismatch)] extern crate rustc_apfloat; extern crate rustc_ast; @@ -247,6 +249,7 @@ fn new_context<'gcc, 'tcx>(tcx: TyCtxt<'tcx>) -> Context<'gcc> { } #[cfg(feature="master")] { + context.set_allow_special_chars_in_func_names(true); let version = Version::get(); let version = format!("{}.{}.{}", version.major, version.minor, version.patch); context.set_output_ident(&format!("rustc version {} with libgccjit {}", diff --git a/src/mono_item.rs b/src/mono_item.rs index 3322d56513b..fdeb2f96fe2 100644 --- a/src/mono_item.rs +++ b/src/mono_item.rs @@ -23,7 +23,7 @@ impl<'gcc, 'tcx> PreDefineMethods<'tcx> for CodegenCx<'gcc, 'tcx> { let is_tls = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL); let global = self.define_global(symbol_name, gcc_type, is_tls, attrs.link_section); #[cfg(feature="master")] - global.add_attribute(VarAttribute::Visibility(base::visibility_to_gcc(visibility))); + global.add_string_attribute(VarAttribute::Visibility(base::visibility_to_gcc(visibility))); // TODO(antoyo): set linkage. self.instances.borrow_mut().insert(instance, global); |
