diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/Cargo.toml | 5 | ||||
| -rw-r--r-- | compiler/rustc_builtin_macros/src/autodiff.rs | 3 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/mod.rs | 9 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/abi.rs | 11 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/back/lto.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/back/write.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_const_eval/src/interpret/memory.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/lib.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/sorted_map.rs | 35 | ||||
| -rw-r--r-- | compiler/rustc_data_structures/src/sorted_map/tests.rs | 10 | ||||
| -rw-r--r-- | compiler/rustc_driver_impl/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_interface/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/allocation.rs | 9 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs | 111 |
17 files changed, 113 insertions, 105 deletions
diff --git a/compiler/rustc/Cargo.toml b/compiler/rustc/Cargo.toml index e3214d1ab9c..9ef8fa75062 100644 --- a/compiler/rustc/Cargo.toml +++ b/compiler/rustc/Cargo.toml @@ -30,6 +30,7 @@ features = ['unprefixed_malloc_on_supported_platforms'] check_only = ['rustc_driver_impl/check_only'] jemalloc = ['dep:tikv-jemalloc-sys'] llvm = ['rustc_driver_impl/llvm'] +llvm_enzyme = ['rustc_driver_impl/llvm_enzyme'] max_level_info = ['rustc_driver_impl/max_level_info'] rustc_randomized_layouts = ['rustc_driver_impl/rustc_randomized_layouts'] # tidy-alphabetical-end diff --git a/compiler/rustc_builtin_macros/Cargo.toml b/compiler/rustc_builtin_macros/Cargo.toml index e56b9e641a1..ce9a3ce3f24 100644 --- a/compiler/rustc_builtin_macros/Cargo.toml +++ b/compiler/rustc_builtin_macros/Cargo.toml @@ -33,3 +33,8 @@ smallvec = { version = "1.8.1", features = ["union", "may_dangle"] } thin-vec = "0.2.12" tracing = "0.1" # tidy-alphabetical-end + +[features] +# tidy-alphabetical-start +llvm_enzyme = [] +# tidy-alphabetical-end diff --git a/compiler/rustc_builtin_macros/src/autodiff.rs b/compiler/rustc_builtin_macros/src/autodiff.rs index 48d0795af5e..f4a923797e2 100644 --- a/compiler/rustc_builtin_macros/src/autodiff.rs +++ b/compiler/rustc_builtin_macros/src/autodiff.rs @@ -209,7 +209,8 @@ mod llvm_enzyme { mut item: Annotatable, mode: DiffMode, ) -> Vec<Annotatable> { - if cfg!(not(llvm_enzyme)) { + // FIXME(bjorn3) maybe have the backend directly tell if autodiff is supported? + if cfg!(not(feature = "llvm_enzyme")) { ecx.sess.dcx().emit_err(errors::AutoDiffSupportNotBuild { span: meta_item.span }); return vec![item]; } diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs index eb0a5336a1f..84fa56cf903 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs @@ -730,7 +730,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { if self.is_sized_indirect() { OperandValue::Ref(PlaceValue::new_sized(val, self.layout.align.abi)).store(bx, dst) } else if self.is_unsized_indirect() { - bug!("unsized `ArgAbi` must be handled through `store_fn_arg`"); + bug!("unsized `ArgAbi` cannot be stored"); } else if let PassMode::Cast { ref cast, .. } = self.mode { // FIXME(eddyb): Figure out when the simpler Store is safe, clang // uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}. @@ -797,12 +797,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { OperandValue::Pair(next(), next()).store(bx, dst); } PassMode::Indirect { meta_attrs: Some(_), .. } => { - let place_val = PlaceValue { - llval: next(), - llextra: Some(next()), - align: self.layout.align.abi, - }; - OperandValue::Ref(place_val).store(bx, dst); + bug!("unsized `ArgAbi` cannot be stored"); } PassMode::Direct(_) | PassMode::Indirect { meta_attrs: None, .. } diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index 2d11628250c..67bd1e59bb0 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -46,5 +46,6 @@ tracing = "0.1" [features] # tidy-alphabetical-start check_only = ["rustc_llvm/check_only"] +llvm_enzyme = [] # tidy-alphabetical-end diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs index ac7583f5666..11be7041167 100644 --- a/compiler/rustc_codegen_llvm/src/abi.rs +++ b/compiler/rustc_codegen_llvm/src/abi.rs @@ -215,9 +215,9 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { let align = attrs.pointee_align.unwrap_or(self.layout.align.abi); OperandValue::Ref(PlaceValue::new_sized(val, align)).store(bx, dst); } - // Unsized indirect qrguments + // Unsized indirect arguments cannot be stored PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => { - bug!("unsized `ArgAbi` must be handled through `store_fn_arg`"); + bug!("unsized `ArgAbi` cannot be stored"); } PassMode::Cast { cast, pad_i32: _ } => { // The ABI mandates that the value is passed as a different struct representation. @@ -272,12 +272,7 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> { OperandValue::Pair(next(), next()).store(bx, dst); } PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => { - let place_val = PlaceValue { - llval: next(), - llextra: Some(next()), - align: self.layout.align.abi, - }; - OperandValue::Ref(place_val).store(bx, dst); + bug!("unsized `ArgAbi` cannot be stored"); } PassMode::Direct(_) | PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: _ } diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index f571716d9dd..78107d95e5a 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -617,7 +617,7 @@ pub(crate) fn run_pass_manager( crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx); } - if cfg!(llvm_enzyme) && enable_ad && !thin { + if cfg!(feature = "llvm_enzyme") && enable_ad && !thin { let opt_stage = llvm::OptStage::FatLTO; let stage = write::AutodiffStage::PostAD; if !config.autodiff.contains(&config::AutoDiff::NoPostopt) { diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 423f0da4878..bda81fbd19e 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -574,7 +574,8 @@ pub(crate) unsafe fn llvm_optimize( // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting // differentiated. - let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable); + let consider_ad = + cfg!(feature = "llvm_enzyme") && config.autodiff.contains(&config::AutoDiff::Enable); let run_enzyme = autodiff_stage == AutodiffStage::DuringAD; let print_before_enzyme = config.autodiff.contains(&config::AutoDiff::PrintModBefore); let print_after_enzyme = config.autodiff.contains(&config::AutoDiff::PrintModAfter); @@ -740,7 +741,8 @@ pub(crate) fn optimize( // If we know that we will later run AD, then we disable vectorization and loop unrolling. // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD). - let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable); + let consider_ad = + cfg!(feature = "llvm_enzyme") && config.autodiff.contains(&config::AutoDiff::Enable); let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD }; // The embedded bitcode is used to run LTO/ThinLTO. // The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO. diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs index 56d756e52cc..695435eb6da 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs @@ -59,10 +59,10 @@ pub(crate) enum LLVMRustVerifierFailureAction { LLVMReturnStatusAction = 2, } -#[cfg(llvm_enzyme)] +#[cfg(feature = "llvm_enzyme")] pub(crate) use self::Enzyme_AD::*; -#[cfg(llvm_enzyme)] +#[cfg(feature = "llvm_enzyme")] pub(crate) mod Enzyme_AD { use std::ffi::{CString, c_char}; @@ -134,10 +134,10 @@ pub(crate) mod Enzyme_AD { } } -#[cfg(not(llvm_enzyme))] +#[cfg(not(feature = "llvm_enzyme"))] pub(crate) use self::Fallback_AD::*; -#[cfg(not(llvm_enzyme))] +#[cfg(not(feature = "llvm_enzyme"))] pub(crate) mod Fallback_AD { #![allow(unused_variables)] diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index ebcdb9461d0..323e1cefd58 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -1504,7 +1504,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { // This will also error if copying partial provenance is not supported. let provenance = src_alloc .provenance() - .prepare_copy(src_range, dest_offset, num_copies, self) + .prepare_copy(src_range, self) .map_err(|e| e.to_interp_error(src_alloc_id))?; // Prepare a copy of the initialization mask. let init = src_alloc.init_mask().prepare_copy(src_range); @@ -1590,7 +1590,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { num_copies, ); // copy the provenance to the destination - dest_alloc.provenance_apply_copy(provenance); + dest_alloc.provenance_apply_copy(provenance, alloc_range(dest_offset, size), num_copies); interp_ok(()) } diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index 17da3ea83c8..e4e86bcc41a 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -34,6 +34,7 @@ #![feature(sized_hierarchy)] #![feature(test)] #![feature(thread_id_value)] +#![feature(trusted_len)] #![feature(type_alias_impl_trait)] #![feature(unwrap_infallible)] // tidy-alphabetical-end diff --git a/compiler/rustc_data_structures/src/sorted_map.rs b/compiler/rustc_data_structures/src/sorted_map.rs index c002d47815b..15e3e6ea4c3 100644 --- a/compiler/rustc_data_structures/src/sorted_map.rs +++ b/compiler/rustc_data_structures/src/sorted_map.rs @@ -1,6 +1,7 @@ use std::borrow::Borrow; use std::cmp::Ordering; use std::fmt::Debug; +use std::iter::TrustedLen; use std::mem; use std::ops::{Bound, Index, IndexMut, RangeBounds}; @@ -215,36 +216,40 @@ impl<K: Ord, V> SortedMap<K, V> { /// It is up to the caller to make sure that the elements are sorted by key /// and that there are no duplicates. #[inline] - pub fn insert_presorted(&mut self, elements: Vec<(K, V)>) { - if elements.is_empty() { + pub fn insert_presorted( + &mut self, + // We require `TrustedLen` to ensure that the `splice` below is actually efficient. + mut elements: impl Iterator<Item = (K, V)> + DoubleEndedIterator + TrustedLen, + ) { + let Some(first) = elements.next() else { return; - } - - debug_assert!(elements.array_windows().all(|[fst, snd]| fst.0 < snd.0)); + }; - let start_index = self.lookup_index_for(&elements[0].0); + let start_index = self.lookup_index_for(&first.0); let elements = match start_index { Ok(index) => { - let mut elements = elements.into_iter(); - self.data[index] = elements.next().unwrap(); - elements + self.data[index] = first; // overwrite first element + elements.chain(None) // insert the rest below } Err(index) => { - if index == self.data.len() || elements.last().unwrap().0 < self.data[index].0 { + let last = elements.next_back(); + if index == self.data.len() + || last.as_ref().is_none_or(|l| l.0 < self.data[index].0) + { // We can copy the whole range without having to mix with // existing elements. - self.data.splice(index..index, elements); + self.data + .splice(index..index, std::iter::once(first).chain(elements).chain(last)); return; } - let mut elements = elements.into_iter(); - self.data.insert(index, elements.next().unwrap()); - elements + self.data.insert(index, first); + elements.chain(last) // insert the rest below } }; - // Insert the rest + // Insert the rest. This is super inefficicent since each insertion copies the entire tail. for (k, v) in elements { self.insert(k, v); } diff --git a/compiler/rustc_data_structures/src/sorted_map/tests.rs b/compiler/rustc_data_structures/src/sorted_map/tests.rs index ea4d2f1feac..17d0d3cb170 100644 --- a/compiler/rustc_data_structures/src/sorted_map/tests.rs +++ b/compiler/rustc_data_structures/src/sorted_map/tests.rs @@ -171,7 +171,7 @@ fn test_insert_presorted_non_overlapping() { map.insert(2, 0); map.insert(8, 0); - map.insert_presorted(vec![(3, 0), (7, 0)]); + map.insert_presorted(vec![(3, 0), (7, 0)].into_iter()); let expected = vec![2, 3, 7, 8]; assert_eq!(keys(map), expected); @@ -183,7 +183,7 @@ fn test_insert_presorted_first_elem_equal() { map.insert(2, 2); map.insert(8, 8); - map.insert_presorted(vec![(2, 0), (7, 7)]); + map.insert_presorted(vec![(2, 0), (7, 7)].into_iter()); let expected = vec![(2, 0), (7, 7), (8, 8)]; assert_eq!(elements(map), expected); @@ -195,7 +195,7 @@ fn test_insert_presorted_last_elem_equal() { map.insert(2, 2); map.insert(8, 8); - map.insert_presorted(vec![(3, 3), (8, 0)]); + map.insert_presorted(vec![(3, 3), (8, 0)].into_iter()); let expected = vec![(2, 2), (3, 3), (8, 0)]; assert_eq!(elements(map), expected); @@ -207,7 +207,7 @@ fn test_insert_presorted_shuffle() { map.insert(2, 2); map.insert(7, 7); - map.insert_presorted(vec![(1, 1), (3, 3), (8, 8)]); + map.insert_presorted(vec![(1, 1), (3, 3), (8, 8)].into_iter()); let expected = vec![(1, 1), (2, 2), (3, 3), (7, 7), (8, 8)]; assert_eq!(elements(map), expected); @@ -219,7 +219,7 @@ fn test_insert_presorted_at_end() { map.insert(1, 1); map.insert(2, 2); - map.insert_presorted(vec![(3, 3), (8, 8)]); + map.insert_presorted(vec![(3, 3), (8, 8)].into_iter()); let expected = vec![(1, 1), (2, 2), (3, 3), (8, 8)]; assert_eq!(elements(map), expected); diff --git a/compiler/rustc_driver_impl/Cargo.toml b/compiler/rustc_driver_impl/Cargo.toml index ae1dbd2cf51..46efa50cff3 100644 --- a/compiler/rustc_driver_impl/Cargo.toml +++ b/compiler/rustc_driver_impl/Cargo.toml @@ -74,6 +74,7 @@ ctrlc = "3.4.4" # tidy-alphabetical-start check_only = ['rustc_interface/check_only'] llvm = ['rustc_interface/llvm'] +llvm_enzyme = ['rustc_interface/llvm_enzyme'] max_level_info = ['rustc_log/max_level_info'] rustc_randomized_layouts = [ 'rustc_index/rustc_randomized_layouts', diff --git a/compiler/rustc_interface/Cargo.toml b/compiler/rustc_interface/Cargo.toml index 473ac5e0cea..f0836c47740 100644 --- a/compiler/rustc_interface/Cargo.toml +++ b/compiler/rustc_interface/Cargo.toml @@ -58,4 +58,5 @@ rustc_abi = { path = "../rustc_abi" } # tidy-alphabetical-start check_only = ['rustc_codegen_llvm?/check_only'] llvm = ['dep:rustc_codegen_llvm'] +llvm_enzyme = ['rustc_builtin_macros/llvm_enzyme', 'rustc_codegen_llvm/llvm_enzyme'] # tidy-alphabetical-end diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index 67962813ae4..8e603ce1b91 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -849,8 +849,13 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> /// /// This is dangerous to use as it can violate internal `Allocation` invariants! /// It only exists to support an efficient implementation of `mem_copy_repeatedly`. - pub fn provenance_apply_copy(&mut self, copy: ProvenanceCopy<Prov>) { - self.provenance.apply_copy(copy) + pub fn provenance_apply_copy( + &mut self, + copy: ProvenanceCopy<Prov>, + range: AllocRange, + repeat: u64, + ) { + self.provenance.apply_copy(copy, range, repeat) } /// Applies a previously prepared copy of the init mask. diff --git a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs index 720e58d7aa0..67baf63bbfa 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs @@ -278,90 +278,78 @@ impl<Prov: Provenance> ProvenanceMap<Prov> { /// A partial, owned list of provenance to transfer into another allocation. /// -/// Offsets are already adjusted to the destination allocation. +/// Offsets are relative to the beginning of the copied range. pub struct ProvenanceCopy<Prov> { - dest_ptrs: Option<Box<[(Size, Prov)]>>, - dest_bytes: Option<Box<[(Size, (Prov, u8))]>>, + ptrs: Box<[(Size, Prov)]>, + bytes: Box<[(Size, (Prov, u8))]>, } impl<Prov: Provenance> ProvenanceMap<Prov> { pub fn prepare_copy( &self, - src: AllocRange, - dest: Size, - count: u64, + range: AllocRange, cx: &impl HasDataLayout, ) -> AllocResult<ProvenanceCopy<Prov>> { - let shift_offset = move |idx, offset| { - // compute offset for current repetition - let dest_offset = dest + src.size * idx; // `Size` operations - // shift offsets from source allocation to destination allocation - (offset - src.start) + dest_offset // `Size` operations - }; + let shift_offset = move |offset| offset - range.start; let ptr_size = cx.data_layout().pointer_size(); // # Pointer-sized provenances // Get the provenances that are entirely within this range. // (Different from `range_get_ptrs` which asks if they overlap the range.) // Only makes sense if we are copying at least one pointer worth of bytes. - let mut dest_ptrs_box = None; - if src.size >= ptr_size { - let adjusted_end = Size::from_bytes(src.end().bytes() - (ptr_size.bytes() - 1)); - let ptrs = self.ptrs.range(src.start..adjusted_end); - // If `count` is large, this is rather wasteful -- we are allocating a big array here, which - // is mostly filled with redundant information since it's just N copies of the same `Prov`s - // at slightly adjusted offsets. The reason we do this is so that in `mark_provenance_range` - // we can use `insert_presorted`. That wouldn't work with an `Iterator` that just produces - // the right sequence of provenance for all N copies. - // Basically, this large array would have to be created anyway in the target allocation. - let mut dest_ptrs = Vec::with_capacity(ptrs.len() * (count as usize)); - for i in 0..count { - dest_ptrs - .extend(ptrs.iter().map(|&(offset, reloc)| (shift_offset(i, offset), reloc))); - } - debug_assert_eq!(dest_ptrs.len(), dest_ptrs.capacity()); - dest_ptrs_box = Some(dest_ptrs.into_boxed_slice()); + let mut ptrs_box: Box<[_]> = Box::new([]); + if range.size >= ptr_size { + let adjusted_end = Size::from_bytes(range.end().bytes() - (ptr_size.bytes() - 1)); + let ptrs = self.ptrs.range(range.start..adjusted_end); + ptrs_box = ptrs.iter().map(|&(offset, reloc)| (shift_offset(offset), reloc)).collect(); }; // # Byte-sized provenances // This includes the existing bytewise provenance in the range, and ptr provenance // that overlaps with the begin/end of the range. - let mut dest_bytes_box = None; - let begin_overlap = self.range_ptrs_get(alloc_range(src.start, Size::ZERO), cx).first(); - let end_overlap = self.range_ptrs_get(alloc_range(src.end(), Size::ZERO), cx).first(); + let mut bytes_box: Box<[_]> = Box::new([]); + let begin_overlap = self.range_ptrs_get(alloc_range(range.start, Size::ZERO), cx).first(); + let end_overlap = self.range_ptrs_get(alloc_range(range.end(), Size::ZERO), cx).first(); // We only need to go here if there is some overlap or some bytewise provenance. if begin_overlap.is_some() || end_overlap.is_some() || self.bytes.is_some() { let mut bytes: Vec<(Size, (Prov, u8))> = Vec::new(); // First, if there is a part of a pointer at the start, add that. if let Some(entry) = begin_overlap { trace!("start overlapping entry: {entry:?}"); - // For really small copies, make sure we don't run off the end of the `src` range. - let entry_end = cmp::min(entry.0 + ptr_size, src.end()); - for offset in src.start..entry_end { - bytes.push((offset, (entry.1, (offset - entry.0).bytes() as u8))); + // For really small copies, make sure we don't run off the end of the range. + let entry_end = cmp::min(entry.0 + ptr_size, range.end()); + for offset in range.start..entry_end { + bytes.push((shift_offset(offset), (entry.1, (offset - entry.0).bytes() as u8))); } } else { trace!("no start overlapping entry"); } // Then the main part, bytewise provenance from `self.bytes`. - bytes.extend(self.range_bytes_get(src)); + bytes.extend( + self.range_bytes_get(range) + .iter() + .map(|&(offset, reloc)| (shift_offset(offset), reloc)), + ); // And finally possibly parts of a pointer at the end. if let Some(entry) = end_overlap { trace!("end overlapping entry: {entry:?}"); - // For really small copies, make sure we don't start before `src` does. - let entry_start = cmp::max(entry.0, src.start); - for offset in entry_start..src.end() { + // For really small copies, make sure we don't start before `range` does. + let entry_start = cmp::max(entry.0, range.start); + for offset in entry_start..range.end() { if bytes.last().is_none_or(|bytes_entry| bytes_entry.0 < offset) { // The last entry, if it exists, has a lower offset than us, so we // can add it at the end and remain sorted. - bytes.push((offset, (entry.1, (offset - entry.0).bytes() as u8))); + bytes.push(( + shift_offset(offset), + (entry.1, (offset - entry.0).bytes() as u8), + )); } else { // There already is an entry for this offset in there! This can happen when the // start and end range checks actually end up hitting the same pointer, so we // already added this in the "pointer at the start" part above. - assert!(entry.0 <= src.start); + assert!(entry.0 <= range.start); } } } else { @@ -372,33 +360,40 @@ impl<Prov: Provenance> ProvenanceMap<Prov> { if !bytes.is_empty() && !Prov::OFFSET_IS_ADDR { // FIXME(#146291): We need to ensure that we don't mix different pointers with // the same provenance. - return Err(AllocError::ReadPartialPointer(src.start)); + return Err(AllocError::ReadPartialPointer(range.start)); } // And again a buffer for the new list on the target side. - let mut dest_bytes = Vec::with_capacity(bytes.len() * (count as usize)); - for i in 0..count { - dest_bytes - .extend(bytes.iter().map(|&(offset, reloc)| (shift_offset(i, offset), reloc))); - } - debug_assert_eq!(dest_bytes.len(), dest_bytes.capacity()); - dest_bytes_box = Some(dest_bytes.into_boxed_slice()); + bytes_box = bytes.into_boxed_slice(); } - Ok(ProvenanceCopy { dest_ptrs: dest_ptrs_box, dest_bytes: dest_bytes_box }) + Ok(ProvenanceCopy { ptrs: ptrs_box, bytes: bytes_box }) } /// Applies a provenance copy. /// The affected range, as defined in the parameters to `prepare_copy` is expected /// to be clear of provenance. - pub fn apply_copy(&mut self, copy: ProvenanceCopy<Prov>) { - if let Some(dest_ptrs) = copy.dest_ptrs { - self.ptrs.insert_presorted(dest_ptrs.into()); + pub fn apply_copy(&mut self, copy: ProvenanceCopy<Prov>, range: AllocRange, repeat: u64) { + let shift_offset = |idx: u64, offset: Size| offset + range.start + idx * range.size; + if !copy.ptrs.is_empty() { + // We want to call `insert_presorted` only once so that, if possible, the entries + // after the range we insert are moved back only once. + let chunk_len = copy.ptrs.len() as u64; + self.ptrs.insert_presorted((0..chunk_len * repeat).map(|i| { + let chunk = i / chunk_len; + let (offset, reloc) = copy.ptrs[(i % chunk_len) as usize]; + (shift_offset(chunk, offset), reloc) + })); } - if let Some(dest_bytes) = copy.dest_bytes - && !dest_bytes.is_empty() - { - self.bytes.get_or_insert_with(Box::default).insert_presorted(dest_bytes.into()); + if !copy.bytes.is_empty() { + let chunk_len = copy.bytes.len() as u64; + self.bytes.get_or_insert_with(Box::default).insert_presorted( + (0..chunk_len * repeat).map(|i| { + let chunk = i / chunk_len; + let (offset, reloc) = copy.bytes[(i % chunk_len) as usize]; + (shift_offset(chunk, offset), reloc) + }), + ); } } } |
