From 16e869a678312b16c2e97461f144ce63ef5309bc Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2024 13:50:39 +0100 Subject: interpret: pass Size and Align to before_memory_deallocation --- src/tools/miri/src/borrow_tracker/mod.rs | 6 +++--- src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs | 6 +++--- src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs | 4 ++-- src/tools/miri/src/concurrency/data_race.rs | 4 ++-- src/tools/miri/src/machine.rs | 7 ++++--- 5 files changed, 14 insertions(+), 13 deletions(-) (limited to 'src/tools') diff --git a/src/tools/miri/src/borrow_tracker/mod.rs b/src/tools/miri/src/borrow_tracker/mod.rs index 262f7c449d2..8d76a488269 100644 --- a/src/tools/miri/src/borrow_tracker/mod.rs +++ b/src/tools/miri/src/borrow_tracker/mod.rs @@ -472,14 +472,14 @@ impl AllocState { &mut self, alloc_id: AllocId, prov_extra: ProvenanceExtra, - range: AllocRange, + size: Size, machine: &MiriMachine<'_, 'tcx>, ) -> InterpResult<'tcx> { match self { AllocState::StackedBorrows(sb) => - sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine), + sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine), AllocState::TreeBorrows(tb) => - tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine), + tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine), } } diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs index 86d22229714..7ca2bc76f1e 100644 --- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs @@ -574,13 +574,13 @@ impl Stacks { &mut self, alloc_id: AllocId, tag: ProvenanceExtra, - range: AllocRange, + size: Size, machine: &MiriMachine<'_, 'tcx>, ) -> InterpResult<'tcx> { - trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, range.size.bytes()); + trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, size.bytes()); let dcx = DiagnosticCxBuilder::dealloc(machine, tag); let state = machine.borrow_tracker.as_ref().unwrap().borrow(); - self.for_each(range, dcx, |stack, dcx, exposed_tags| { + self.for_each(alloc_range(Size::ZERO, size), dcx, |stack, dcx, exposed_tags| { stack.dealloc(tag, &state, dcx, exposed_tags) })?; Ok(()) diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs index 4b944ea88f5..9dd147af424 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs @@ -80,7 +80,7 @@ impl<'tcx> Tree { &mut self, alloc_id: AllocId, prov: ProvenanceExtra, - range: AllocRange, + size: Size, machine: &MiriMachine<'_, 'tcx>, ) -> InterpResult<'tcx> { // TODO: for now we bail out on wildcard pointers. Eventually we should @@ -91,7 +91,7 @@ impl<'tcx> Tree { }; let global = machine.borrow_tracker.as_ref().unwrap(); let span = machine.current_span(); - self.dealloc(tag, range, global, alloc_id, span) + self.dealloc(tag, alloc_range(Size::ZERO, size), global, alloc_id, span) } pub fn expose_tag(&mut self, _tag: BorTag) { diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs index e3044884083..4a1c3ac868e 100644 --- a/src/tools/miri/src/concurrency/data_race.rs +++ b/src/tools/miri/src/concurrency/data_race.rs @@ -1071,10 +1071,10 @@ impl VClockAlloc { pub fn deallocate<'tcx>( &mut self, alloc_id: AllocId, - range: AllocRange, + size: Size, machine: &mut MiriMachine<'_, '_>, ) -> InterpResult<'tcx> { - self.unique_access(alloc_id, range, NaWriteType::Deallocate, machine) + self.unique_access(alloc_id, alloc_range(Size::ZERO, size), NaWriteType::Deallocate, machine) } } diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index c3c3a815856..fda7e6f4449 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1288,16 +1288,17 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { machine: &mut Self, alloc_extra: &mut AllocExtra<'tcx>, (alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra), - range: AllocRange, + size: Size, + _align: Align, ) -> InterpResult<'tcx> { if machine.tracked_alloc_ids.contains(&alloc_id) { machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id)); } if let Some(data_race) = &mut alloc_extra.data_race { - data_race.deallocate(alloc_id, range, machine)?; + data_race.deallocate(alloc_id, size, machine)?; } if let Some(borrow_tracker) = &mut alloc_extra.borrow_tracker { - borrow_tracker.before_memory_deallocation(alloc_id, prove_extra, range, machine)?; + borrow_tracker.before_memory_deallocation(alloc_id, prove_extra, size, machine)?; } if let Some((_, deallocated_at)) = machine.allocation_spans.borrow_mut().get_mut(&alloc_id) { -- cgit 1.4.1-3-g733a5 From 092bd53c1ef96ede03bb50ecefc15c6a365531f4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2024 13:20:51 +0100 Subject: rename intptrcast -> alloc_addresses, and make a folder for it --- src/tools/miri/src/alloc_addresses/mod.rs | 337 ++++++++++++++++++++++++++++++ src/tools/miri/src/intptrcast.rs | 334 ----------------------------- src/tools/miri/src/lib.rs | 4 +- src/tools/miri/src/machine.rs | 10 +- src/tools/miri/src/provenance_gc.rs | 2 +- 5 files changed, 345 insertions(+), 342 deletions(-) create mode 100644 src/tools/miri/src/alloc_addresses/mod.rs delete mode 100644 src/tools/miri/src/intptrcast.rs (limited to 'src/tools') diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs new file mode 100644 index 00000000000..3177a1297c8 --- /dev/null +++ b/src/tools/miri/src/alloc_addresses/mod.rs @@ -0,0 +1,337 @@ +//! This module is responsible for managing the absolute addresses that allocations are located at, +//! and for casting between pointers and integers based on those addresses. + +use std::cell::RefCell; +use std::cmp::max; +use std::collections::hash_map::Entry; + +use rand::Rng; + +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_span::Span; +use rustc_target::abi::{HasDataLayout, Size}; + +use crate::*; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ProvenanceMode { + /// We support `expose_addr`/`from_exposed_addr` via "wildcard" provenance. + /// However, we want on `from_exposed_addr` to alert the user of the precision loss. + Default, + /// Like `Default`, but without the warning. + Permissive, + /// We error on `from_exposed_addr`, ensuring no precision loss. + Strict, +} + +pub type GlobalState = RefCell; + +#[derive(Clone, Debug)] +pub struct GlobalStateInner { + /// This is used as a map between the address of each allocation and its `AllocId`. It is always + /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset + /// from the base address, and we need to find the `AllocId` it belongs to. This is not the + /// *full* inverse of `base_addr`; dead allocations have been removed. + int_to_ptr_map: Vec<(u64, AllocId)>, + /// The base address for each allocation. We cannot put that into + /// `AllocExtra` because function pointers also have a base address, and + /// they do not have an `AllocExtra`. + /// This is the inverse of `int_to_ptr_map`. + base_addr: FxHashMap, + /// Whether an allocation has been exposed or not. This cannot be put + /// into `AllocExtra` for the same reason as `base_addr`. + exposed: FxHashSet, + /// This is used as a memory address when a new pointer is casted to an integer. It + /// is always larger than any address that was previously made part of a block. + next_base_addr: u64, + /// The provenance to use for int2ptr casts + provenance_mode: ProvenanceMode, +} + +impl VisitProvenance for GlobalStateInner { + fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { + let GlobalStateInner { + int_to_ptr_map: _, + base_addr: _, + exposed: _, + next_base_addr: _, + provenance_mode: _, + } = self; + // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them. + // int_to_ptr_map and exposed must contain only live allocations, and those + // are never garbage collected. + // base_addr is only relevant if we have a pointer to an AllocId and need to look up its + // base address; so if an AllocId is not reachable from somewhere else we can remove it + // here. + } +} + +impl GlobalStateInner { + pub fn new(config: &MiriConfig, stack_addr: u64) -> Self { + GlobalStateInner { + int_to_ptr_map: Vec::default(), + base_addr: FxHashMap::default(), + exposed: FxHashSet::default(), + next_base_addr: stack_addr, + provenance_mode: config.provenance_mode, + } + } + + pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_, '_>) { + // `exposed` and `int_to_ptr_map` are cleared immediately when an allocation + // is freed, so `base_addr` is the only one we have to clean up based on the GC. + self.base_addr.retain(|id, _| allocs.is_live(*id)); + } +} + +/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple +/// of `align` that is larger or equal to `addr` +fn align_addr(addr: u64, align: u64) -> u64 { + match addr % align { + 0 => addr, + rem => addr.checked_add(align).unwrap() - rem, + } +} + +impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { + // Returns the exposed `AllocId` that corresponds to the specified addr, + // or `None` if the addr is out of bounds + fn alloc_id_from_addr(&self, addr: u64) -> Option { + let ecx = self.eval_context_ref(); + let global_state = ecx.machine.alloc_addresses.borrow(); + assert!(global_state.provenance_mode != ProvenanceMode::Strict); + + let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr); + + // Determine the in-bounds provenance for this pointer. + // (This is only called on an actual access, so in-bounds is the only possible kind of provenance.) + let alloc_id = match pos { + Ok(pos) => Some(global_state.int_to_ptr_map[pos].1), + Err(0) => None, + Err(pos) => { + // This is the largest of the addresses smaller than `int`, + // i.e. the greatest lower bound (glb) + let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1]; + // This never overflows because `addr >= glb` + let offset = addr - glb; + // We require this to be strict in-bounds of the allocation. This arm is only + // entered for addresses that are not the base address, so even zero-sized + // allocations will get recognized at their base address -- but all other + // allocations will *not* be recognized at their "end" address. + let size = ecx.get_alloc_info(alloc_id).0; + if offset < size.bytes() { Some(alloc_id) } else { None } + } + }?; + + // We only use this provenance if it has been exposed. + if global_state.exposed.contains(&alloc_id) { + // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed. + debug_assert!(ecx.is_alloc_live(alloc_id)); + Some(alloc_id) + } else { + None + } + } + + fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> { + let ecx = self.eval_context_ref(); + let mut global_state = ecx.machine.alloc_addresses.borrow_mut(); + let global_state = &mut *global_state; + + Ok(match global_state.base_addr.entry(alloc_id) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let (size, align, kind) = ecx.get_alloc_info(alloc_id); + // This is either called immediately after allocation (and then cached), or when + // adjusting `tcx` pointers (which never get freed). So assert that we are looking + // at a live allocation. This also ensures that we never re-assign an address to an + // allocation that previously had an address, but then was freed and the address + // information was removed. + assert!(!matches!(kind, AllocKind::Dead)); + + // This allocation does not have a base address yet, pick one. + // Leave some space to the previous allocation, to give it some chance to be less aligned. + let slack = { + let mut rng = ecx.machine.rng.borrow_mut(); + // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. + rng.gen_range(0..16) + }; + // From next_base_addr + slack, round up to adjust for alignment. + let base_addr = global_state + .next_base_addr + .checked_add(slack) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + let base_addr = align_addr(base_addr, align.bytes()); + entry.insert(base_addr); + trace!( + "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", + base_addr, + alloc_id, + size.bytes(), + align.bytes(), + slack, + ); + + // Remember next base address. If this allocation is zero-sized, leave a gap + // of at least 1 to avoid two allocations having the same base address. + // (The logic in `alloc_id_from_addr` assumes unique addresses, and different + // function/vtable pointers need to be distinguishable!) + global_state.next_base_addr = base_addr + .checked_add(max(size.bytes(), 1)) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + // Even if `Size` didn't overflow, we might still have filled up the address space. + if global_state.next_base_addr > ecx.target_usize_max() { + throw_exhaust!(AddressSpaceFull); + } + // Also maintain the opposite mapping in `int_to_ptr_map`. + // Given that `next_base_addr` increases in each allocation, pushing the + // corresponding tuple keeps `int_to_ptr_map` sorted + global_state.int_to_ptr_map.push((base_addr, alloc_id)); + + base_addr + } + }) + } +} + +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} +pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { + fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> { + let ecx = self.eval_context_mut(); + let global_state = ecx.machine.alloc_addresses.get_mut(); + // In strict mode, we don't need this, so we can save some cycles by not tracking it. + if global_state.provenance_mode == ProvenanceMode::Strict { + return Ok(()); + } + // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation + // via int2ptr. + if !ecx.is_alloc_live(alloc_id) { + return Ok(()); + } + trace!("Exposing allocation id {alloc_id:?}"); + let global_state = ecx.machine.alloc_addresses.get_mut(); + global_state.exposed.insert(alloc_id); + if ecx.machine.borrow_tracker.is_some() { + ecx.expose_tag(alloc_id, tag)?; + } + Ok(()) + } + + fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer>> { + trace!("Casting {:#x} to a pointer", addr); + + let ecx = self.eval_context_ref(); + let global_state = ecx.machine.alloc_addresses.borrow(); + + // Potentially emit a warning. + match global_state.provenance_mode { + ProvenanceMode::Default => { + // The first time this happens at a particular location, print a warning. + thread_local! { + // `Span` is non-`Send`, so we use a thread-local instead. + static PAST_WARNINGS: RefCell> = RefCell::default(); + } + PAST_WARNINGS.with_borrow_mut(|past_warnings| { + let first = past_warnings.is_empty(); + if past_warnings.insert(ecx.cur_span()) { + // Newly inserted, so first time we see this span. + ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first }); + } + }); + } + ProvenanceMode::Strict => { + throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance); + } + ProvenanceMode::Permissive => {} + } + + // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is + // completely legal to do a cast and then `wrapping_offset` to another allocation and only + // *then* do a memory access. So the allocation that the pointer happens to point to on a + // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that + // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed) + // allocation it might be referencing. + Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr))) + } + + /// Convert a relative (tcx) pointer to a Miri pointer. + fn ptr_from_rel_ptr( + &self, + ptr: Pointer, + tag: BorTag, + ) -> InterpResult<'tcx, Pointer> { + let ecx = self.eval_context_ref(); + + let (prov, offset) = ptr.into_parts(); // offset is relative (AllocId provenance) + let alloc_id = prov.alloc_id(); + let base_addr = ecx.addr_from_alloc_id(alloc_id)?; + + // Add offset with the right kind of pointer-overflowing arithmetic. + let dl = ecx.data_layout(); + let absolute_addr = dl.overflowing_offset(base_addr, offset.bytes()).0; + Ok(Pointer::new(Provenance::Concrete { alloc_id, tag }, Size::from_bytes(absolute_addr))) + } + + /// When a pointer is used for a memory access, this computes where in which allocation the + /// access is going. + fn ptr_get_alloc(&self, ptr: Pointer) -> Option<(AllocId, Size)> { + let ecx = self.eval_context_ref(); + + let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance) + + let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag { + alloc_id + } else { + // A wildcard pointer. + ecx.alloc_id_from_addr(addr.bytes())? + }; + + // This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr + // must have been called in the past, so we can just look up the address in the map. + let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap(); + + // Wrapping "addr - base_addr" + #[allow(clippy::cast_possible_wrap)] // we want to wrap here + let neg_base_addr = (base_addr as i64).wrapping_neg(); + Some(( + alloc_id, + Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0), + )) + } +} + +impl GlobalStateInner { + pub fn free_alloc_id(&mut self, dead_id: AllocId) { + // We can *not* remove this from `base_addr`, since the interpreter design requires that we + // be able to retrieve an AllocId + offset for any memory access *before* we check if the + // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory + // access to determine the allocation ID and offset -- and there can still be pointers with + // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return + // `None` only if the pointer truly has no provenance (this ensures consistent error + // messages). + // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist + // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never + // returns a dead allocation. + // To avoid a linear scan we first look up the address in `base_addr`, and then find it in + // `int_to_ptr_map`. + let addr = *self.base_addr.get(&dead_id).unwrap(); + let pos = self.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap(); + let removed = self.int_to_ptr_map.remove(pos); + assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing + // We can also remove it from `exposed`, since this allocation can anyway not be returned by + // `alloc_id_from_addr` any more. + self.exposed.remove(&dead_id); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_align_addr() { + assert_eq!(align_addr(37, 4), 40); + assert_eq!(align_addr(44, 4), 44); + } +} diff --git a/src/tools/miri/src/intptrcast.rs b/src/tools/miri/src/intptrcast.rs deleted file mode 100644 index 3fe127f9732..00000000000 --- a/src/tools/miri/src/intptrcast.rs +++ /dev/null @@ -1,334 +0,0 @@ -use std::cell::RefCell; -use std::cmp::max; -use std::collections::hash_map::Entry; - -use rand::Rng; - -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; -use rustc_span::Span; -use rustc_target::abi::{HasDataLayout, Size}; - -use crate::*; - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum ProvenanceMode { - /// We support `expose_addr`/`from_exposed_addr` via "wildcard" provenance. - /// However, we want on `from_exposed_addr` to alert the user of the precision loss. - Default, - /// Like `Default`, but without the warning. - Permissive, - /// We error on `from_exposed_addr`, ensuring no precision loss. - Strict, -} - -pub type GlobalState = RefCell; - -#[derive(Clone, Debug)] -pub struct GlobalStateInner { - /// This is used as a map between the address of each allocation and its `AllocId`. It is always - /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset - /// from the base address, and we need to find the `AllocId` it belongs to. This is not the - /// *full* inverse of `base_addr`; dead allocations have been removed. - int_to_ptr_map: Vec<(u64, AllocId)>, - /// The base address for each allocation. We cannot put that into - /// `AllocExtra` because function pointers also have a base address, and - /// they do not have an `AllocExtra`. - /// This is the inverse of `int_to_ptr_map`. - base_addr: FxHashMap, - /// Whether an allocation has been exposed or not. This cannot be put - /// into `AllocExtra` for the same reason as `base_addr`. - exposed: FxHashSet, - /// This is used as a memory address when a new pointer is casted to an integer. It - /// is always larger than any address that was previously made part of a block. - next_base_addr: u64, - /// The provenance to use for int2ptr casts - provenance_mode: ProvenanceMode, -} - -impl VisitProvenance for GlobalStateInner { - fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { - let GlobalStateInner { - int_to_ptr_map: _, - base_addr: _, - exposed: _, - next_base_addr: _, - provenance_mode: _, - } = self; - // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them. - // int_to_ptr_map and exposed must contain only live allocations, and those - // are never garbage collected. - // base_addr is only relevant if we have a pointer to an AllocId and need to look up its - // base address; so if an AllocId is not reachable from somewhere else we can remove it - // here. - } -} - -impl GlobalStateInner { - pub fn new(config: &MiriConfig, stack_addr: u64) -> Self { - GlobalStateInner { - int_to_ptr_map: Vec::default(), - base_addr: FxHashMap::default(), - exposed: FxHashSet::default(), - next_base_addr: stack_addr, - provenance_mode: config.provenance_mode, - } - } - - pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_, '_>) { - // `exposed` and `int_to_ptr_map` are cleared immediately when an allocation - // is freed, so `base_addr` is the only one we have to clean up based on the GC. - self.base_addr.retain(|id, _| allocs.is_live(*id)); - } -} - -/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple -/// of `align` that is larger or equal to `addr` -fn align_addr(addr: u64, align: u64) -> u64 { - match addr % align { - 0 => addr, - rem => addr.checked_add(align).unwrap() - rem, - } -} - -impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} -trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { - // Returns the exposed `AllocId` that corresponds to the specified addr, - // or `None` if the addr is out of bounds - fn alloc_id_from_addr(&self, addr: u64) -> Option { - let ecx = self.eval_context_ref(); - let global_state = ecx.machine.intptrcast.borrow(); - assert!(global_state.provenance_mode != ProvenanceMode::Strict); - - let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr); - - // Determine the in-bounds provenance for this pointer. - // (This is only called on an actual access, so in-bounds is the only possible kind of provenance.) - let alloc_id = match pos { - Ok(pos) => Some(global_state.int_to_ptr_map[pos].1), - Err(0) => None, - Err(pos) => { - // This is the largest of the addresses smaller than `int`, - // i.e. the greatest lower bound (glb) - let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1]; - // This never overflows because `addr >= glb` - let offset = addr - glb; - // We require this to be strict in-bounds of the allocation. This arm is only - // entered for addresses that are not the base address, so even zero-sized - // allocations will get recognized at their base address -- but all other - // allocations will *not* be recognized at their "end" address. - let size = ecx.get_alloc_info(alloc_id).0; - if offset < size.bytes() { Some(alloc_id) } else { None } - } - }?; - - // We only use this provenance if it has been exposed. - if global_state.exposed.contains(&alloc_id) { - // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed. - debug_assert!(ecx.is_alloc_live(alloc_id)); - Some(alloc_id) - } else { - None - } - } - - fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> { - let ecx = self.eval_context_ref(); - let mut global_state = ecx.machine.intptrcast.borrow_mut(); - let global_state = &mut *global_state; - - Ok(match global_state.base_addr.entry(alloc_id) { - Entry::Occupied(entry) => *entry.get(), - Entry::Vacant(entry) => { - let (size, align, kind) = ecx.get_alloc_info(alloc_id); - // This is either called immediately after allocation (and then cached), or when - // adjusting `tcx` pointers (which never get freed). So assert that we are looking - // at a live allocation. This also ensures that we never re-assign an address to an - // allocation that previously had an address, but then was freed and the address - // information was removed. - assert!(!matches!(kind, AllocKind::Dead)); - - // This allocation does not have a base address yet, pick one. - // Leave some space to the previous allocation, to give it some chance to be less aligned. - let slack = { - let mut rng = ecx.machine.rng.borrow_mut(); - // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. - rng.gen_range(0..16) - }; - // From next_base_addr + slack, round up to adjust for alignment. - let base_addr = global_state - .next_base_addr - .checked_add(slack) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - let base_addr = align_addr(base_addr, align.bytes()); - entry.insert(base_addr); - trace!( - "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", - base_addr, - alloc_id, - size.bytes(), - align.bytes(), - slack, - ); - - // Remember next base address. If this allocation is zero-sized, leave a gap - // of at least 1 to avoid two allocations having the same base address. - // (The logic in `alloc_id_from_addr` assumes unique addresses, and different - // function/vtable pointers need to be distinguishable!) - global_state.next_base_addr = base_addr - .checked_add(max(size.bytes(), 1)) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - // Even if `Size` didn't overflow, we might still have filled up the address space. - if global_state.next_base_addr > ecx.target_usize_max() { - throw_exhaust!(AddressSpaceFull); - } - // Also maintain the opposite mapping in `int_to_ptr_map`. - // Given that `next_base_addr` increases in each allocation, pushing the - // corresponding tuple keeps `int_to_ptr_map` sorted - global_state.int_to_ptr_map.push((base_addr, alloc_id)); - - base_addr - } - }) - } -} - -impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {} -pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { - fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> { - let ecx = self.eval_context_mut(); - let global_state = ecx.machine.intptrcast.get_mut(); - // In strict mode, we don't need this, so we can save some cycles by not tracking it. - if global_state.provenance_mode == ProvenanceMode::Strict { - return Ok(()); - } - // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation - // via int2ptr. - if !ecx.is_alloc_live(alloc_id) { - return Ok(()); - } - trace!("Exposing allocation id {alloc_id:?}"); - let global_state = ecx.machine.intptrcast.get_mut(); - global_state.exposed.insert(alloc_id); - if ecx.machine.borrow_tracker.is_some() { - ecx.expose_tag(alloc_id, tag)?; - } - Ok(()) - } - - fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer>> { - trace!("Casting {:#x} to a pointer", addr); - - let ecx = self.eval_context_ref(); - let global_state = ecx.machine.intptrcast.borrow(); - - // Potentially emit a warning. - match global_state.provenance_mode { - ProvenanceMode::Default => { - // The first time this happens at a particular location, print a warning. - thread_local! { - // `Span` is non-`Send`, so we use a thread-local instead. - static PAST_WARNINGS: RefCell> = RefCell::default(); - } - PAST_WARNINGS.with_borrow_mut(|past_warnings| { - let first = past_warnings.is_empty(); - if past_warnings.insert(ecx.cur_span()) { - // Newly inserted, so first time we see this span. - ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first }); - } - }); - } - ProvenanceMode::Strict => { - throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance); - } - ProvenanceMode::Permissive => {} - } - - // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is - // completely legal to do a cast and then `wrapping_offset` to another allocation and only - // *then* do a memory access. So the allocation that the pointer happens to point to on a - // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that - // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed) - // allocation it might be referencing. - Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr))) - } - - /// Convert a relative (tcx) pointer to a Miri pointer. - fn ptr_from_rel_ptr( - &self, - ptr: Pointer, - tag: BorTag, - ) -> InterpResult<'tcx, Pointer> { - let ecx = self.eval_context_ref(); - - let (prov, offset) = ptr.into_parts(); // offset is relative (AllocId provenance) - let alloc_id = prov.alloc_id(); - let base_addr = ecx.addr_from_alloc_id(alloc_id)?; - - // Add offset with the right kind of pointer-overflowing arithmetic. - let dl = ecx.data_layout(); - let absolute_addr = dl.overflowing_offset(base_addr, offset.bytes()).0; - Ok(Pointer::new(Provenance::Concrete { alloc_id, tag }, Size::from_bytes(absolute_addr))) - } - - /// When a pointer is used for a memory access, this computes where in which allocation the - /// access is going. - fn ptr_get_alloc(&self, ptr: Pointer) -> Option<(AllocId, Size)> { - let ecx = self.eval_context_ref(); - - let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance) - - let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag { - alloc_id - } else { - // A wildcard pointer. - ecx.alloc_id_from_addr(addr.bytes())? - }; - - // This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr - // must have been called in the past, so we can just look up the address in the map. - let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap(); - - // Wrapping "addr - base_addr" - #[allow(clippy::cast_possible_wrap)] // we want to wrap here - let neg_base_addr = (base_addr as i64).wrapping_neg(); - Some(( - alloc_id, - Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0), - )) - } -} - -impl GlobalStateInner { - pub fn free_alloc_id(&mut self, dead_id: AllocId) { - // We can *not* remove this from `base_addr`, since the interpreter design requires that we - // be able to retrieve an AllocId + offset for any memory access *before* we check if the - // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory - // access to determine the allocation ID and offset -- and there can still be pointers with - // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return - // `None` only if the pointer truly has no provenance (this ensures consistent error - // messages). - // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist - // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never - // returns a dead allocation. - // To avoid a linear scan we first look up the address in `base_addr`, and then find it in - // `int_to_ptr_map`. - let addr = *self.base_addr.get(&dead_id).unwrap(); - let pos = self.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap(); - let removed = self.int_to_ptr_map.remove(pos); - assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing - // We can also remove it from `exposed`, since this allocation can anyway not be returned by - // `alloc_id_from_addr` any more. - self.exposed.remove(&dead_id); - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_align_addr() { - assert_eq!(align_addr(37, 4), 40); - assert_eq!(align_addr(44, 4), 44); - } -} diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index e1d0bc1c183..819952d09e1 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -71,13 +71,13 @@ extern crate tracing; #[allow(unused_extern_crates)] extern crate rustc_driver; +mod alloc_addresses; mod borrow_tracker; mod clock; mod concurrency; mod diagnostics; mod eval; mod helpers; -mod intptrcast; mod machine; mod mono_hash_map; mod operator; @@ -100,6 +100,7 @@ pub use crate::shims::panic::{CatchUnwindData, EvalContextExt as _}; pub use crate::shims::time::EvalContextExt as _; pub use crate::shims::tls::TlsData; +pub use crate::alloc_addresses::{EvalContextExt as _, ProvenanceMode}; pub use crate::borrow_tracker::stacked_borrows::{ EvalContextExt as _, Item, Permission, Stack, Stacks, }; @@ -121,7 +122,6 @@ pub use crate::eval::{ create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith, }; pub use crate::helpers::{AccessKind, EvalContextExt as _}; -pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode}; pub use crate::machine::{ AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra, diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index fda7e6f4449..3a54629222d 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -436,7 +436,7 @@ pub struct MiriMachine<'mir, 'tcx> { pub data_race: Option, /// Ptr-int-cast module global data. - pub intptrcast: intptrcast::GlobalState, + pub alloc_addresses: alloc_addresses::GlobalState, /// Environment variables set by `setenv`. /// Miri does not expose env vars from the host to the emulated program. @@ -634,7 +634,7 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> { tcx, borrow_tracker, data_race, - intptrcast: RefCell::new(intptrcast::GlobalStateInner::new(config, stack_addr)), + alloc_addresses: RefCell::new(alloc_addresses::GlobalStateInner::new(config, stack_addr)), // `env_vars` depends on a full interpreter so we cannot properly initialize it yet. env_vars: EnvVars::default(), main_fn_ret_place: None, @@ -782,7 +782,7 @@ impl VisitProvenance for MiriMachine<'_, '_> { dir_handler, borrow_tracker, data_race, - intptrcast, + alloc_addresses, file_handler, tcx: _, isolated_op: _, @@ -827,7 +827,7 @@ impl VisitProvenance for MiriMachine<'_, '_> { file_handler.visit_provenance(visit); data_race.visit_provenance(visit); borrow_tracker.visit_provenance(visit); - intptrcast.visit_provenance(visit); + alloc_addresses.visit_provenance(visit); main_fn_ret_place.visit_provenance(visit); argc.visit_provenance(visit); argv.visit_provenance(visit); @@ -1304,7 +1304,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { { *deallocated_at = Some(machine.current_span()); } - machine.intptrcast.get_mut().free_alloc_id(alloc_id); + machine.alloc_addresses.get_mut().free_alloc_id(alloc_id); Ok(()) } diff --git a/src/tools/miri/src/provenance_gc.rs b/src/tools/miri/src/provenance_gc.rs index 347951ce372..f23d7dfd52d 100644 --- a/src/tools/miri/src/provenance_gc.rs +++ b/src/tools/miri/src/provenance_gc.rs @@ -197,7 +197,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> { let allocs = LiveAllocs { ecx: this, collected: allocs }; this.machine.allocation_spans.borrow_mut().retain(|id, _| allocs.is_live(*id)); this.machine.symbolic_alignment.borrow_mut().retain(|id, _| allocs.is_live(*id)); - this.machine.intptrcast.borrow_mut().remove_unreachable_allocs(&allocs); + this.machine.alloc_addresses.borrow_mut().remove_unreachable_allocs(&allocs); if let Some(borrow_tracker) = &this.machine.borrow_tracker { borrow_tracker.borrow_mut().remove_unreachable_allocs(&allocs); } -- cgit 1.4.1-3-g733a5 From c01676835125747b5fff3143ca2a71d2bf7b94c4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2024 16:05:37 +0100 Subject: miri: add some chance to reuse addresses of previously freed allocations --- src/tools/miri/src/alloc_addresses/mod.rs | 101 +++++++++++++++-------- src/tools/miri/src/alloc_addresses/reuse_pool.rs | 87 +++++++++++++++++++ src/tools/miri/src/helpers.rs | 4 +- src/tools/miri/src/machine.rs | 9 +- src/tools/miri/tests/pass/address-reuse.rs | 16 ++++ src/tools/miri/tests/pass/intptrcast.rs | 4 +- 6 files changed, 182 insertions(+), 39 deletions(-) create mode 100644 src/tools/miri/src/alloc_addresses/reuse_pool.rs create mode 100644 src/tools/miri/tests/pass/address-reuse.rs (limited to 'src/tools') diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs index 3177a1297c8..e1714aa9e46 100644 --- a/src/tools/miri/src/alloc_addresses/mod.rs +++ b/src/tools/miri/src/alloc_addresses/mod.rs @@ -1,6 +1,8 @@ //! This module is responsible for managing the absolute addresses that allocations are located at, //! and for casting between pointers and integers based on those addresses. +mod reuse_pool; + use std::cell::RefCell; use std::cmp::max; use std::collections::hash_map::Entry; @@ -9,9 +11,10 @@ use rand::Rng; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_span::Span; -use rustc_target::abi::{HasDataLayout, Size}; +use rustc_target::abi::{Align, HasDataLayout, Size}; use crate::*; +use reuse_pool::ReusePool; #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum ProvenanceMode { @@ -26,7 +29,7 @@ pub enum ProvenanceMode { pub type GlobalState = RefCell; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct GlobalStateInner { /// This is used as a map between the address of each allocation and its `AllocId`. It is always /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset @@ -38,6 +41,8 @@ pub struct GlobalStateInner { /// they do not have an `AllocExtra`. /// This is the inverse of `int_to_ptr_map`. base_addr: FxHashMap, + /// A pool of addresses we can reuse for future allocations. + reuse: ReusePool, /// Whether an allocation has been exposed or not. This cannot be put /// into `AllocExtra` for the same reason as `base_addr`. exposed: FxHashSet, @@ -53,6 +58,7 @@ impl VisitProvenance for GlobalStateInner { let GlobalStateInner { int_to_ptr_map: _, base_addr: _, + reuse: _, exposed: _, next_base_addr: _, provenance_mode: _, @@ -71,6 +77,7 @@ impl GlobalStateInner { GlobalStateInner { int_to_ptr_map: Vec::default(), base_addr: FxHashMap::default(), + reuse: ReusePool::new(), exposed: FxHashSet::default(), next_base_addr: stack_addr, provenance_mode: config.provenance_mode, @@ -142,6 +149,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { Ok(match global_state.base_addr.entry(alloc_id) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { + let mut rng = ecx.machine.rng.borrow_mut(); let (size, align, kind) = ecx.get_alloc_info(alloc_id); // This is either called immediately after allocation (and then cached), or when // adjusting `tcx` pointers (which never get freed). So assert that we are looking @@ -150,44 +158,63 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { // information was removed. assert!(!matches!(kind, AllocKind::Dead)); - // This allocation does not have a base address yet, pick one. - // Leave some space to the previous allocation, to give it some chance to be less aligned. - let slack = { - let mut rng = ecx.machine.rng.borrow_mut(); - // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. - rng.gen_range(0..16) + // This allocation does not have a base address yet, pick or reuse one. + let base_addr = if let Some(reuse_addr) = + global_state.reuse.take_addr(&mut *rng, size, align) + { + reuse_addr + } else { + // We have to pick a fresh address. + // Leave some space to the previous allocation, to give it some chance to be less aligned. + // We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. + let slack = rng.gen_range(0..16); + // From next_base_addr + slack, round up to adjust for alignment. + let base_addr = global_state + .next_base_addr + .checked_add(slack) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + let base_addr = align_addr(base_addr, align.bytes()); + + // Remember next base address. If this allocation is zero-sized, leave a gap + // of at least 1 to avoid two allocations having the same base address. + // (The logic in `alloc_id_from_addr` assumes unique addresses, and different + // function/vtable pointers need to be distinguishable!) + global_state.next_base_addr = base_addr + .checked_add(max(size.bytes(), 1)) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + // Even if `Size` didn't overflow, we might still have filled up the address space. + if global_state.next_base_addr > ecx.target_usize_max() { + throw_exhaust!(AddressSpaceFull); + } + + base_addr }; - // From next_base_addr + slack, round up to adjust for alignment. - let base_addr = global_state - .next_base_addr - .checked_add(slack) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - let base_addr = align_addr(base_addr, align.bytes()); - entry.insert(base_addr); trace!( - "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", + "Assigning base address {:#x} to allocation {:?} (size: {}, align: {})", base_addr, alloc_id, size.bytes(), align.bytes(), - slack, ); - // Remember next base address. If this allocation is zero-sized, leave a gap - // of at least 1 to avoid two allocations having the same base address. - // (The logic in `alloc_id_from_addr` assumes unique addresses, and different - // function/vtable pointers need to be distinguishable!) - global_state.next_base_addr = base_addr - .checked_add(max(size.bytes(), 1)) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - // Even if `Size` didn't overflow, we might still have filled up the address space. - if global_state.next_base_addr > ecx.target_usize_max() { - throw_exhaust!(AddressSpaceFull); - } - // Also maintain the opposite mapping in `int_to_ptr_map`. - // Given that `next_base_addr` increases in each allocation, pushing the - // corresponding tuple keeps `int_to_ptr_map` sorted - global_state.int_to_ptr_map.push((base_addr, alloc_id)); + // Store address in cache. + entry.insert(base_addr); + + // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted. + // We have a fast-path for the common case that this address is bigger than all previous ones. + let pos = if global_state + .int_to_ptr_map + .last() + .is_some_and(|(last_addr, _)| *last_addr < base_addr) + { + global_state.int_to_ptr_map.len() + } else { + global_state + .int_to_ptr_map + .binary_search_by_key(&base_addr, |(addr, _)| *addr) + .unwrap_err() + }; + global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id)); base_addr } @@ -302,7 +329,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } impl GlobalStateInner { - pub fn free_alloc_id(&mut self, dead_id: AllocId) { + pub fn free_alloc_id( + &mut self, + rng: &mut impl Rng, + dead_id: AllocId, + size: Size, + align: Align, + ) { // We can *not* remove this from `base_addr`, since the interpreter design requires that we // be able to retrieve an AllocId + offset for any memory access *before* we check if the // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory @@ -322,6 +355,8 @@ impl GlobalStateInner { // We can also remove it from `exposed`, since this allocation can anyway not be returned by // `alloc_id_from_addr` any more. self.exposed.remove(&dead_id); + // Also remember this address for future reuse. + self.reuse.add_addr(rng, addr, size, align) } } diff --git a/src/tools/miri/src/alloc_addresses/reuse_pool.rs b/src/tools/miri/src/alloc_addresses/reuse_pool.rs new file mode 100644 index 00000000000..8374d0ec605 --- /dev/null +++ b/src/tools/miri/src/alloc_addresses/reuse_pool.rs @@ -0,0 +1,87 @@ +//! Manages a pool of addresses that can be reused. + +use rand::Rng; + +use rustc_target::abi::{Align, Size}; + +const MAX_POOL_SIZE: usize = 64; + +// Just use fair coins, until we have evidence that other numbers are better. +const ADDR_REMEMBER_CHANCE: f64 = 0.5; +const ADDR_TAKE_CHANCE: f64 = 0.5; + +/// The pool strikes a balance between exploring more possible executions and making it more likely +/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for +/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data +/// structure. Therefore we only reuse allocations when size and alignment match exactly. +#[derive(Debug)] +pub struct ReusePool { + /// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable + /// allocations as address-size pairs, the list must be sorted by the size. + /// + /// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to + /// less than 64 different possible value, that bounds the overall size of the pool. + pool: Vec>, +} + +impl ReusePool { + pub fn new() -> Self { + ReusePool { pool: vec![] } + } + + fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> { + let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap(); + if self.pool.len() <= pool_idx { + self.pool.resize(pool_idx + 1, Vec::new()); + } + &mut self.pool[pool_idx] + } + + pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) { + // Let's see if we even want to remember this address. + if !rng.gen_bool(ADDR_REMEMBER_CHANCE) { + return; + } + // Determine the pool to add this to, and where in the pool to put it. + let subpool = self.subpool(align); + let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size); + // Make sure the pool does not grow too big. + if subpool.len() >= MAX_POOL_SIZE { + // Pool full. Replace existing element, or last one if this would be even bigger. + let clamped_pos = pos.min(subpool.len() - 1); + subpool[clamped_pos] = (addr, size); + return; + } + // Add address to pool, at the right position. + subpool.insert(pos, (addr, size)); + } + + pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option { + // Determine whether we'll even attempt a reuse. + if !rng.gen_bool(ADDR_TAKE_CHANCE) { + return None; + } + // Determine the pool to take this from. + let subpool = self.subpool(align); + // Let's see if we can find something of the right size. We want to find the full range of + // such items, beginning with the first, so we can't use `binary_search_by_key`. + let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size); + let mut end = begin; + while let Some((_addr, other_size)) = subpool.get(end) { + if *other_size != size { + break; + } + end += 1; + } + if end == begin { + // Could not find any item of the right size. + return None; + } + // Pick a random element with the desired size. + let idx = rng.gen_range(begin..end); + // Remove it from the pool and return. + let (chosen_addr, chosen_size) = subpool.remove(idx); + debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0); + Some(chosen_addr) + } +} diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index 9e4b5fe8ad7..371dc6edda3 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -3,6 +3,8 @@ use std::iter; use std::num::NonZero; use std::time::Duration; +use rand::RngCore; + use rustc_apfloat::ieee::{Double, Single}; use rustc_apfloat::Float; use rustc_hir::def::{DefKind, Namespace}; @@ -18,8 +20,6 @@ use rustc_span::{def_id::CrateNum, sym, Span, Symbol}; use rustc_target::abi::{Align, FieldIdx, FieldsShape, Size, Variants}; use rustc_target::spec::abi::Abi; -use rand::RngCore; - use crate::*; /// Indicates which kind of access is being performed. diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 3a54629222d..53f30ec2ba4 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1289,7 +1289,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { alloc_extra: &mut AllocExtra<'tcx>, (alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra), size: Size, - _align: Align, + align: Align, ) -> InterpResult<'tcx> { if machine.tracked_alloc_ids.contains(&alloc_id) { machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id)); @@ -1304,7 +1304,12 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { { *deallocated_at = Some(machine.current_span()); } - machine.alloc_addresses.get_mut().free_alloc_id(alloc_id); + machine.alloc_addresses.get_mut().free_alloc_id( + machine.rng.get_mut(), + alloc_id, + size, + align, + ); Ok(()) } diff --git a/src/tools/miri/tests/pass/address-reuse.rs b/src/tools/miri/tests/pass/address-reuse.rs new file mode 100644 index 00000000000..9b5c8c38b8f --- /dev/null +++ b/src/tools/miri/tests/pass/address-reuse.rs @@ -0,0 +1,16 @@ +//! Check that we do sometimes reuse addresses. +use std::collections::HashSet; + +fn main() { + let count = 100; + let mut addrs = HashSet::::new(); + for _ in 0..count { + // We make a `Box` with a layout that's hopefully not used by tons of things inside the + // allocator itself, so that we are more likely to get reuse. (With `i32` or `usize`, on + // Windows the reuse chances are very low.) + let b = Box::new([42usize; 4]); + addrs.insert(&*b as *const [usize; 4] as usize); + } + // dbg!(addrs.len()); + assert!(addrs.len() > 1 && addrs.len() < count); +} diff --git a/src/tools/miri/tests/pass/intptrcast.rs b/src/tools/miri/tests/pass/intptrcast.rs index 42b6f433420..370b09f512c 100644 --- a/src/tools/miri/tests/pass/intptrcast.rs +++ b/src/tools/miri/tests/pass/intptrcast.rs @@ -67,8 +67,8 @@ fn ptr_eq_dangling() { drop(b); let b = Box::new(0); let y = &*b as *const i32; // different allocation - // They *could* be equal if memory was reused, but probably are not. - assert!(x != y); + // They *could* be equal if memory is reused... + assert!(x != y || x == y); } fn ptr_eq_out_of_bounds() { -- cgit 1.4.1-3-g733a5