diff options
Diffstat (limited to 'compiler/rustc_mir/src/interpret/memory.rs')
| -rw-r--r-- | compiler/rustc_mir/src/interpret/memory.rs | 1028 |
1 files changed, 1028 insertions, 0 deletions
diff --git a/compiler/rustc_mir/src/interpret/memory.rs b/compiler/rustc_mir/src/interpret/memory.rs new file mode 100644 index 00000000000..d4be2ce0568 --- /dev/null +++ b/compiler/rustc_mir/src/interpret/memory.rs @@ -0,0 +1,1028 @@ +//! The memory subsystem. +//! +//! Generally, we use `Pointer` to denote memory addresses. However, some operations +//! have a "size"-like parameter, and they take `Scalar` for the address because +//! if the size is 0, then the pointer can also be a (properly aligned, non-NULL) +//! integer. It is crucial that these operations call `check_align` *before* +//! short-circuiting the empty case! + +use std::borrow::Cow; +use std::collections::VecDeque; +use std::convert::{TryFrom, TryInto}; +use std::fmt; +use std::ptr; + +use rustc_ast::Mutability; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_middle::ty::{Instance, ParamEnv, TyCtxt}; +use rustc_target::abi::{Align, HasDataLayout, Size, TargetDataLayout}; + +use super::{ + AllocId, AllocMap, Allocation, AllocationExtra, CheckInAllocMsg, GlobalAlloc, InterpResult, + Machine, MayLeak, Pointer, PointerArithmetic, Scalar, +}; +use crate::util::pretty; + +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum MemoryKind<T> { + /// Stack memory. Error if deallocated except during a stack pop. + Stack, + /// Memory backing vtables. Error if ever deallocated. + Vtable, + /// Memory allocated by `caller_location` intrinsic. Error if ever deallocated. + CallerLocation, + /// Additional memory kinds a machine wishes to distinguish from the builtin ones. + Machine(T), +} + +impl<T: MayLeak> MayLeak for MemoryKind<T> { + #[inline] + fn may_leak(self) -> bool { + match self { + MemoryKind::Stack => false, + MemoryKind::Vtable => true, + MemoryKind::CallerLocation => true, + MemoryKind::Machine(k) => k.may_leak(), + } + } +} + +impl<T: fmt::Display> fmt::Display for MemoryKind<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MemoryKind::Stack => write!(f, "stack variable"), + MemoryKind::Vtable => write!(f, "vtable"), + MemoryKind::CallerLocation => write!(f, "caller location"), + MemoryKind::Machine(m) => write!(f, "{}", m), + } + } +} + +/// Used by `get_size_and_align` to indicate whether the allocation needs to be live. +#[derive(Debug, Copy, Clone)] +pub enum AllocCheck { + /// Allocation must be live and not a function pointer. + Dereferenceable, + /// Allocations needs to be live, but may be a function pointer. + Live, + /// Allocation may be dead. + MaybeDead, +} + +/// The value of a function pointer. +#[derive(Debug, Copy, Clone)] +pub enum FnVal<'tcx, Other> { + Instance(Instance<'tcx>), + Other(Other), +} + +impl<'tcx, Other> FnVal<'tcx, Other> { + pub fn as_instance(self) -> InterpResult<'tcx, Instance<'tcx>> { + match self { + FnVal::Instance(instance) => Ok(instance), + FnVal::Other(_) => { + throw_unsup_format!("'foreign' function pointers are not supported in this context") + } + } + } +} + +// `Memory` has to depend on the `Machine` because some of its operations +// (e.g., `get`) call a `Machine` hook. +pub struct Memory<'mir, 'tcx, M: Machine<'mir, 'tcx>> { + /// Allocations local to this instance of the miri engine. The kind + /// helps ensure that the same mechanism is used for allocation and + /// deallocation. When an allocation is not found here, it is a + /// global and looked up in the `tcx` for read access. Some machines may + /// have to mutate this map even on a read-only access to a global (because + /// they do pointer provenance tracking and the allocations in `tcx` have + /// the wrong type), so we let the machine override this type. + /// Either way, if the machine allows writing to a global, doing so will + /// create a copy of the global allocation here. + // FIXME: this should not be public, but interning currently needs access to it + pub(super) alloc_map: M::MemoryMap, + + /// Map for "extra" function pointers. + extra_fn_ptr_map: FxHashMap<AllocId, M::ExtraFnVal>, + + /// To be able to compare pointers with NULL, and to check alignment for accesses + /// to ZSTs (where pointers may dangle), we keep track of the size even for allocations + /// that do not exist any more. + // FIXME: this should not be public, but interning currently needs access to it + pub(super) dead_alloc_map: FxHashMap<AllocId, (Size, Align)>, + + /// Extra data added by the machine. + pub extra: M::MemoryExtra, + + /// Lets us implement `HasDataLayout`, which is awfully convenient. + pub tcx: TyCtxt<'tcx>, +} + +impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> HasDataLayout for Memory<'mir, 'tcx, M> { + #[inline] + fn data_layout(&self) -> &TargetDataLayout { + &self.tcx.data_layout + } +} + +impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { + pub fn new(tcx: TyCtxt<'tcx>, extra: M::MemoryExtra) -> Self { + Memory { + alloc_map: M::MemoryMap::default(), + extra_fn_ptr_map: FxHashMap::default(), + dead_alloc_map: FxHashMap::default(), + extra, + tcx, + } + } + + /// Call this to turn untagged "global" pointers (obtained via `tcx`) into + /// the machine pointer to the allocation. Must never be used + /// for any other pointers, nor for TLS statics. + /// + /// Using the resulting pointer represents a *direct* access to that memory + /// (e.g. by directly using a `static`), + /// as opposed to access through a pointer that was created by the program. + /// + /// This function can fail only if `ptr` points to an `extern static`. + #[inline] + pub fn global_base_pointer( + &self, + mut ptr: Pointer, + ) -> InterpResult<'tcx, Pointer<M::PointerTag>> { + // We need to handle `extern static`. + let ptr = match self.tcx.get_global_alloc(ptr.alloc_id) { + Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => { + bug!("global memory cannot point to thread-local static") + } + Some(GlobalAlloc::Static(def_id)) if self.tcx.is_foreign_item(def_id) => { + ptr.alloc_id = M::extern_static_alloc_id(self, def_id)?; + ptr + } + _ => { + // No need to change the `AllocId`. + ptr + } + }; + // And we need to get the tag. + let tag = M::tag_global_base_pointer(&self.extra, ptr.alloc_id); + Ok(ptr.with_tag(tag)) + } + + pub fn create_fn_alloc( + &mut self, + fn_val: FnVal<'tcx, M::ExtraFnVal>, + ) -> Pointer<M::PointerTag> { + let id = match fn_val { + FnVal::Instance(instance) => self.tcx.create_fn_alloc(instance), + FnVal::Other(extra) => { + // FIXME(RalfJung): Should we have a cache here? + let id = self.tcx.reserve_alloc_id(); + let old = self.extra_fn_ptr_map.insert(id, extra); + assert!(old.is_none()); + id + } + }; + // Functions are global allocations, so make sure we get the right base pointer. + // We know this is not an `extern static` so this cannot fail. + self.global_base_pointer(Pointer::from(id)).unwrap() + } + + pub fn allocate( + &mut self, + size: Size, + align: Align, + kind: MemoryKind<M::MemoryKind>, + ) -> Pointer<M::PointerTag> { + let alloc = Allocation::uninit(size, align); + self.allocate_with(alloc, kind) + } + + pub fn allocate_bytes( + &mut self, + bytes: &[u8], + kind: MemoryKind<M::MemoryKind>, + ) -> Pointer<M::PointerTag> { + let alloc = Allocation::from_byte_aligned_bytes(bytes); + self.allocate_with(alloc, kind) + } + + pub fn allocate_with( + &mut self, + alloc: Allocation, + kind: MemoryKind<M::MemoryKind>, + ) -> Pointer<M::PointerTag> { + let id = self.tcx.reserve_alloc_id(); + debug_assert_ne!( + Some(kind), + M::GLOBAL_KIND.map(MemoryKind::Machine), + "dynamically allocating global memory" + ); + // This is a new allocation, not a new global one, so no `global_base_ptr`. + let (alloc, tag) = M::init_allocation_extra(&self.extra, id, Cow::Owned(alloc), Some(kind)); + self.alloc_map.insert(id, (kind, alloc.into_owned())); + Pointer::from(id).with_tag(tag) + } + + pub fn reallocate( + &mut self, + ptr: Pointer<M::PointerTag>, + old_size_and_align: Option<(Size, Align)>, + new_size: Size, + new_align: Align, + kind: MemoryKind<M::MemoryKind>, + ) -> InterpResult<'tcx, Pointer<M::PointerTag>> { + if ptr.offset.bytes() != 0 { + throw_ub_format!( + "reallocating {:?} which does not point to the beginning of an object", + ptr + ); + } + + // For simplicities' sake, we implement reallocate as "alloc, copy, dealloc". + // This happens so rarely, the perf advantage is outweighed by the maintenance cost. + let new_ptr = self.allocate(new_size, new_align, kind); + let old_size = match old_size_and_align { + Some((size, _align)) => size, + None => self.get_raw(ptr.alloc_id)?.size, + }; + self.copy(ptr, new_ptr, old_size.min(new_size), /*nonoverlapping*/ true)?; + self.deallocate(ptr, old_size_and_align, kind)?; + + Ok(new_ptr) + } + + /// Deallocate a local, or do nothing if that local has been made into a global. + pub fn deallocate_local(&mut self, ptr: Pointer<M::PointerTag>) -> InterpResult<'tcx> { + // The allocation might be already removed by global interning. + // This can only really happen in the CTFE instance, not in miri. + if self.alloc_map.contains_key(&ptr.alloc_id) { + self.deallocate(ptr, None, MemoryKind::Stack) + } else { + Ok(()) + } + } + + pub fn deallocate( + &mut self, + ptr: Pointer<M::PointerTag>, + old_size_and_align: Option<(Size, Align)>, + kind: MemoryKind<M::MemoryKind>, + ) -> InterpResult<'tcx> { + trace!("deallocating: {}", ptr.alloc_id); + + if ptr.offset.bytes() != 0 { + throw_ub_format!( + "deallocating {:?} which does not point to the beginning of an object", + ptr + ); + } + + M::before_deallocation(&mut self.extra, ptr.alloc_id)?; + + let (alloc_kind, mut alloc) = match self.alloc_map.remove(&ptr.alloc_id) { + Some(alloc) => alloc, + None => { + // Deallocating global memory -- always an error + return Err(match self.tcx.get_global_alloc(ptr.alloc_id) { + Some(GlobalAlloc::Function(..)) => err_ub_format!("deallocating a function"), + Some(GlobalAlloc::Static(..) | GlobalAlloc::Memory(..)) => { + err_ub_format!("deallocating static memory") + } + None => err_ub!(PointerUseAfterFree(ptr.alloc_id)), + } + .into()); + } + }; + + if alloc_kind != kind { + throw_ub_format!( + "deallocating {} memory using {} deallocation operation", + alloc_kind, + kind + ); + } + if let Some((size, align)) = old_size_and_align { + if size != alloc.size || align != alloc.align { + throw_ub_format!( + "incorrect layout on deallocation: allocation has size {} and alignment {}, but gave size {} and alignment {}", + alloc.size.bytes(), + alloc.align.bytes(), + size.bytes(), + align.bytes(), + ) + } + } + + // Let the machine take some extra action + let size = alloc.size; + AllocationExtra::memory_deallocated(&mut alloc, ptr, size)?; + + // Don't forget to remember size and align of this now-dead allocation + let old = self.dead_alloc_map.insert(ptr.alloc_id, (alloc.size, alloc.align)); + if old.is_some() { + bug!("Nothing can be deallocated twice"); + } + + Ok(()) + } + + /// Check if the given scalar is allowed to do a memory access of given `size` + /// and `align`. On success, returns `None` for zero-sized accesses (where + /// nothing else is left to do) and a `Pointer` to use for the actual access otherwise. + /// Crucially, if the input is a `Pointer`, we will test it for liveness + /// *even if* the size is 0. + /// + /// Everyone accessing memory based on a `Scalar` should use this method to get the + /// `Pointer` they need. And even if you already have a `Pointer`, call this method + /// to make sure it is sufficiently aligned and not dangling. Not doing that may + /// cause ICEs. + /// + /// Most of the time you should use `check_mplace_access`, but when you just have a pointer, + /// this method is still appropriate. + #[inline(always)] + pub fn check_ptr_access( + &self, + sptr: Scalar<M::PointerTag>, + size: Size, + align: Align, + ) -> InterpResult<'tcx, Option<Pointer<M::PointerTag>>> { + let align = M::enforce_alignment(&self.extra).then_some(align); + self.check_ptr_access_align(sptr, size, align, CheckInAllocMsg::MemoryAccessTest) + } + + /// Like `check_ptr_access`, but *definitely* checks alignment when `align` + /// is `Some` (overriding `M::enforce_alignment`). Also lets the caller control + /// the error message for the out-of-bounds case. + pub fn check_ptr_access_align( + &self, + sptr: Scalar<M::PointerTag>, + size: Size, + align: Option<Align>, + msg: CheckInAllocMsg, + ) -> InterpResult<'tcx, Option<Pointer<M::PointerTag>>> { + fn check_offset_align(offset: u64, align: Align) -> InterpResult<'static> { + if offset % align.bytes() == 0 { + Ok(()) + } else { + // The biggest power of two through which `offset` is divisible. + let offset_pow2 = 1 << offset.trailing_zeros(); + throw_ub!(AlignmentCheckFailed { + has: Align::from_bytes(offset_pow2).unwrap(), + required: align, + }) + } + } + + // Normalize to a `Pointer` if we definitely need one. + let normalized = if size.bytes() == 0 { + // Can be an integer, just take what we got. We do NOT `force_bits` here; + // if this is already a `Pointer` we want to do the bounds checks! + sptr + } else { + // A "real" access, we must get a pointer to be able to check the bounds. + Scalar::from(self.force_ptr(sptr)?) + }; + Ok(match normalized.to_bits_or_ptr(self.pointer_size(), self) { + Ok(bits) => { + let bits = u64::try_from(bits).unwrap(); // it's ptr-sized + assert!(size.bytes() == 0); + // Must be non-NULL. + if bits == 0 { + throw_ub!(DanglingIntPointer(0, msg)) + } + // Must be aligned. + if let Some(align) = align { + check_offset_align(bits, align)?; + } + None + } + Err(ptr) => { + let (allocation_size, alloc_align) = + self.get_size_and_align(ptr.alloc_id, AllocCheck::Dereferenceable)?; + // Test bounds. This also ensures non-NULL. + // It is sufficient to check this for the end pointer. The addition + // checks for overflow. + let end_ptr = ptr.offset(size, self)?; + if end_ptr.offset > allocation_size { + // equal is okay! + throw_ub!(PointerOutOfBounds { ptr: end_ptr.erase_tag(), msg, allocation_size }) + } + // Test align. Check this last; if both bounds and alignment are violated + // we want the error to be about the bounds. + if let Some(align) = align { + if M::force_int_for_alignment_check(&self.extra) { + let bits = self + .force_bits(ptr.into(), self.pointer_size()) + .expect("ptr-to-int cast for align check should never fail"); + check_offset_align(bits.try_into().unwrap(), align)?; + } else { + // Check allocation alignment and offset alignment. + if alloc_align.bytes() < align.bytes() { + throw_ub!(AlignmentCheckFailed { has: alloc_align, required: align }); + } + check_offset_align(ptr.offset.bytes(), align)?; + } + } + + // We can still be zero-sized in this branch, in which case we have to + // return `None`. + if size.bytes() == 0 { None } else { Some(ptr) } + } + }) + } + + /// Test if the pointer might be NULL. + pub fn ptr_may_be_null(&self, ptr: Pointer<M::PointerTag>) -> bool { + let (size, _align) = self + .get_size_and_align(ptr.alloc_id, AllocCheck::MaybeDead) + .expect("alloc info with MaybeDead cannot fail"); + // If the pointer is out-of-bounds, it may be null. + // Note that one-past-the-end (offset == size) is still inbounds, and never null. + ptr.offset > size + } +} + +/// Allocation accessors +impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { + /// Helper function to obtain a global (tcx) allocation. + /// This attempts to return a reference to an existing allocation if + /// one can be found in `tcx`. That, however, is only possible if `tcx` and + /// this machine use the same pointer tag, so it is indirected through + /// `M::tag_allocation`. + fn get_global_alloc( + memory_extra: &M::MemoryExtra, + tcx: TyCtxt<'tcx>, + id: AllocId, + is_write: bool, + ) -> InterpResult<'tcx, Cow<'tcx, Allocation<M::PointerTag, M::AllocExtra>>> { + let (alloc, def_id) = match tcx.get_global_alloc(id) { + Some(GlobalAlloc::Memory(mem)) => { + // Memory of a constant or promoted or anonymous memory referenced by a static. + (mem, None) + } + Some(GlobalAlloc::Function(..)) => throw_ub!(DerefFunctionPointer(id)), + None => throw_ub!(PointerUseAfterFree(id)), + Some(GlobalAlloc::Static(def_id)) => { + assert!(tcx.is_static(def_id)); + assert!(!tcx.is_thread_local_static(def_id)); + // Notice that every static has two `AllocId` that will resolve to the same + // thing here: one maps to `GlobalAlloc::Static`, this is the "lazy" ID, + // and the other one is maps to `GlobalAlloc::Memory`, this is returned by + // `const_eval_raw` and it is the "resolved" ID. + // The resolved ID is never used by the interpreted program, it is hidden. + // This is relied upon for soundness of const-patterns; a pointer to the resolved + // ID would "sidestep" the checks that make sure consts do not point to statics! + // The `GlobalAlloc::Memory` branch here is still reachable though; when a static + // contains a reference to memory that was created during its evaluation (i.e., not + // to another static), those inner references only exist in "resolved" form. + if tcx.is_foreign_item(def_id) { + throw_unsup!(ReadExternStatic(def_id)); + } + + (tcx.eval_static_initializer(def_id)?, Some(def_id)) + } + }; + M::before_access_global(memory_extra, id, alloc, def_id, is_write)?; + let alloc = Cow::Borrowed(alloc); + // We got tcx memory. Let the machine initialize its "extra" stuff. + let (alloc, tag) = M::init_allocation_extra( + memory_extra, + id, // always use the ID we got as input, not the "hidden" one. + alloc, + M::GLOBAL_KIND.map(MemoryKind::Machine), + ); + // Sanity check that this is the same pointer we would have gotten via `global_base_pointer`. + debug_assert_eq!(tag, M::tag_global_base_pointer(memory_extra, id)); + Ok(alloc) + } + + /// Gives raw access to the `Allocation`, without bounds or alignment checks. + /// Use the higher-level, `PlaceTy`- and `OpTy`-based APIs in `InterpCx` instead! + pub fn get_raw( + &self, + id: AllocId, + ) -> InterpResult<'tcx, &Allocation<M::PointerTag, M::AllocExtra>> { + // The error type of the inner closure here is somewhat funny. We have two + // ways of "erroring": An actual error, or because we got a reference from + // `get_global_alloc` that we can actually use directly without inserting anything anywhere. + // So the error type is `InterpResult<'tcx, &Allocation<M::PointerTag>>`. + let a = self.alloc_map.get_or(id, || { + let alloc = Self::get_global_alloc(&self.extra, self.tcx, id, /*is_write*/ false) + .map_err(Err)?; + match alloc { + Cow::Borrowed(alloc) => { + // We got a ref, cheaply return that as an "error" so that the + // map does not get mutated. + Err(Ok(alloc)) + } + Cow::Owned(alloc) => { + // Need to put it into the map and return a ref to that + let kind = M::GLOBAL_KIND.expect( + "I got a global allocation that I have to copy but the machine does \ + not expect that to happen", + ); + Ok((MemoryKind::Machine(kind), alloc)) + } + } + }); + // Now unpack that funny error type + match a { + Ok(a) => Ok(&a.1), + Err(a) => a, + } + } + + /// Gives raw mutable access to the `Allocation`, without bounds or alignment checks. + /// Use the higher-level, `PlaceTy`- and `OpTy`-based APIs in `InterpCx` instead! + pub fn get_raw_mut( + &mut self, + id: AllocId, + ) -> InterpResult<'tcx, &mut Allocation<M::PointerTag, M::AllocExtra>> { + let tcx = self.tcx; + let memory_extra = &self.extra; + let a = self.alloc_map.get_mut_or(id, || { + // Need to make a copy, even if `get_global_alloc` is able + // to give us a cheap reference. + let alloc = Self::get_global_alloc(memory_extra, tcx, id, /*is_write*/ true)?; + if alloc.mutability == Mutability::Not { + throw_ub!(WriteToReadOnly(id)) + } + let kind = M::GLOBAL_KIND.expect( + "I got a global allocation that I have to copy but the machine does \ + not expect that to happen", + ); + Ok((MemoryKind::Machine(kind), alloc.into_owned())) + }); + // Unpack the error type manually because type inference doesn't + // work otherwise (and we cannot help it because `impl Trait`) + match a { + Err(e) => Err(e), + Ok(a) => { + let a = &mut a.1; + if a.mutability == Mutability::Not { + throw_ub!(WriteToReadOnly(id)) + } + Ok(a) + } + } + } + + /// Obtain the size and alignment of an allocation, even if that allocation has + /// been deallocated. + /// + /// If `liveness` is `AllocCheck::MaybeDead`, this function always returns `Ok`. + pub fn get_size_and_align( + &self, + id: AllocId, + liveness: AllocCheck, + ) -> InterpResult<'static, (Size, Align)> { + // # Regular allocations + // Don't use `self.get_raw` here as that will + // a) cause cycles in case `id` refers to a static + // b) duplicate a global's allocation in miri + if let Some((_, alloc)) = self.alloc_map.get(id) { + return Ok((alloc.size, alloc.align)); + } + + // # Function pointers + // (both global from `alloc_map` and local from `extra_fn_ptr_map`) + if self.get_fn_alloc(id).is_some() { + return if let AllocCheck::Dereferenceable = liveness { + // The caller requested no function pointers. + throw_ub!(DerefFunctionPointer(id)) + } else { + Ok((Size::ZERO, Align::from_bytes(1).unwrap())) + }; + } + + // # Statics + // Can't do this in the match argument, we may get cycle errors since the lock would + // be held throughout the match. + match self.tcx.get_global_alloc(id) { + Some(GlobalAlloc::Static(did)) => { + assert!(!self.tcx.is_thread_local_static(did)); + // Use size and align of the type. + let ty = self.tcx.type_of(did); + let layout = self.tcx.layout_of(ParamEnv::empty().and(ty)).unwrap(); + Ok((layout.size, layout.align.abi)) + } + Some(GlobalAlloc::Memory(alloc)) => { + // Need to duplicate the logic here, because the global allocations have + // different associated types than the interpreter-local ones. + Ok((alloc.size, alloc.align)) + } + Some(GlobalAlloc::Function(_)) => bug!("We already checked function pointers above"), + // The rest must be dead. + None => { + if let AllocCheck::MaybeDead = liveness { + // Deallocated pointers are allowed, we should be able to find + // them in the map. + Ok(*self + .dead_alloc_map + .get(&id) + .expect("deallocated pointers should all be recorded in `dead_alloc_map`")) + } else { + throw_ub!(PointerUseAfterFree(id)) + } + } + } + } + + fn get_fn_alloc(&self, id: AllocId) -> Option<FnVal<'tcx, M::ExtraFnVal>> { + trace!("reading fn ptr: {}", id); + if let Some(extra) = self.extra_fn_ptr_map.get(&id) { + Some(FnVal::Other(*extra)) + } else { + match self.tcx.get_global_alloc(id) { + Some(GlobalAlloc::Function(instance)) => Some(FnVal::Instance(instance)), + _ => None, + } + } + } + + pub fn get_fn( + &self, + ptr: Scalar<M::PointerTag>, + ) -> InterpResult<'tcx, FnVal<'tcx, M::ExtraFnVal>> { + let ptr = self.force_ptr(ptr)?; // We definitely need a pointer value. + if ptr.offset.bytes() != 0 { + throw_ub!(InvalidFunctionPointer(ptr.erase_tag())) + } + self.get_fn_alloc(ptr.alloc_id) + .ok_or_else(|| err_ub!(InvalidFunctionPointer(ptr.erase_tag())).into()) + } + + pub fn mark_immutable(&mut self, id: AllocId) -> InterpResult<'tcx> { + self.get_raw_mut(id)?.mutability = Mutability::Not; + Ok(()) + } + + /// Create a lazy debug printer that prints the given allocation and all allocations it points + /// to, recursively. + #[must_use] + pub fn dump_alloc<'a>(&'a self, id: AllocId) -> DumpAllocs<'a, 'mir, 'tcx, M> { + self.dump_allocs(vec![id]) + } + + /// Create a lazy debug printer for a list of allocations and all allocations they point to, + /// recursively. + #[must_use] + pub fn dump_allocs<'a>(&'a self, mut allocs: Vec<AllocId>) -> DumpAllocs<'a, 'mir, 'tcx, M> { + allocs.sort(); + allocs.dedup(); + DumpAllocs { mem: self, allocs } + } + + /// Print leaked memory. Allocations reachable from `static_roots` or a `Global` allocation + /// are not considered leaked. Leaks whose kind `may_leak()` returns true are not reported. + pub fn leak_report(&self, static_roots: &[AllocId]) -> usize { + // Collect the set of allocations that are *reachable* from `Global` allocations. + let reachable = { + let mut reachable = FxHashSet::default(); + let global_kind = M::GLOBAL_KIND.map(MemoryKind::Machine); + let mut todo: Vec<_> = self.alloc_map.filter_map_collect(move |&id, &(kind, _)| { + if Some(kind) == global_kind { Some(id) } else { None } + }); + todo.extend(static_roots); + while let Some(id) = todo.pop() { + if reachable.insert(id) { + // This is a new allocation, add its relocations to `todo`. + if let Some((_, alloc)) = self.alloc_map.get(id) { + todo.extend(alloc.relocations().values().map(|&(_, target_id)| target_id)); + } + } + } + reachable + }; + + // All allocations that are *not* `reachable` and *not* `may_leak` are considered leaking. + let leaks: Vec<_> = self.alloc_map.filter_map_collect(|&id, &(kind, _)| { + if kind.may_leak() || reachable.contains(&id) { None } else { Some(id) } + }); + let n = leaks.len(); + if n > 0 { + eprintln!("The following memory was leaked: {:?}", self.dump_allocs(leaks)); + } + n + } + + /// This is used by [priroda](https://github.com/oli-obk/priroda) + pub fn alloc_map(&self) -> &M::MemoryMap { + &self.alloc_map + } +} + +#[doc(hidden)] +/// There's no way to use this directly, it's just a helper struct for the `dump_alloc(s)` methods. +pub struct DumpAllocs<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> { + mem: &'a Memory<'mir, 'tcx, M>, + allocs: Vec<AllocId>, +} + +impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> std::fmt::Debug for DumpAllocs<'a, 'mir, 'tcx, M> { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Cannot be a closure because it is generic in `Tag`, `Extra`. + fn write_allocation_track_relocs<'tcx, Tag: Copy + fmt::Debug, Extra>( + fmt: &mut std::fmt::Formatter<'_>, + tcx: TyCtxt<'tcx>, + allocs_to_print: &mut VecDeque<AllocId>, + alloc: &Allocation<Tag, Extra>, + ) -> std::fmt::Result { + for &(_, target_id) in alloc.relocations().values() { + allocs_to_print.push_back(target_id); + } + write!(fmt, "{}", pretty::display_allocation(tcx, alloc)) + } + + let mut allocs_to_print: VecDeque<_> = self.allocs.iter().copied().collect(); + // `allocs_printed` contains all allocations that we have already printed. + let mut allocs_printed = FxHashSet::default(); + + while let Some(id) = allocs_to_print.pop_front() { + if !allocs_printed.insert(id) { + // Already printed, so skip this. + continue; + } + + write!(fmt, "{}", id)?; + match self.mem.alloc_map.get(id) { + Some(&(kind, ref alloc)) => { + // normal alloc + write!(fmt, " ({}, ", kind)?; + write_allocation_track_relocs( + &mut *fmt, + self.mem.tcx, + &mut allocs_to_print, + alloc, + )?; + } + None => { + // global alloc + match self.mem.tcx.get_global_alloc(id) { + Some(GlobalAlloc::Memory(alloc)) => { + write!(fmt, " (unchanged global, ")?; + write_allocation_track_relocs( + &mut *fmt, + self.mem.tcx, + &mut allocs_to_print, + alloc, + )?; + } + Some(GlobalAlloc::Function(func)) => { + write!(fmt, " (fn: {})", func)?; + } + Some(GlobalAlloc::Static(did)) => { + write!(fmt, " (static: {})", self.mem.tcx.def_path_str(did))?; + } + None => { + write!(fmt, " (deallocated)")?; + } + } + } + } + writeln!(fmt)?; + } + Ok(()) + } +} + +/// Reading and writing. +impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { + /// Reads the given number of bytes from memory. Returns them as a slice. + /// + /// Performs appropriate bounds checks. + pub fn read_bytes(&self, ptr: Scalar<M::PointerTag>, size: Size) -> InterpResult<'tcx, &[u8]> { + let ptr = match self.check_ptr_access(ptr, size, Align::from_bytes(1).unwrap())? { + Some(ptr) => ptr, + None => return Ok(&[]), // zero-sized access + }; + self.get_raw(ptr.alloc_id)?.get_bytes(self, ptr, size) + } + + /// Reads a 0-terminated sequence of bytes from memory. Returns them as a slice. + /// + /// Performs appropriate bounds checks. + pub fn read_c_str(&self, ptr: Scalar<M::PointerTag>) -> InterpResult<'tcx, &[u8]> { + let ptr = self.force_ptr(ptr)?; // We need to read at least 1 byte, so we *need* a ptr. + self.get_raw(ptr.alloc_id)?.read_c_str(self, ptr) + } + + /// Reads a 0x0000-terminated u16-sequence from memory. Returns them as a Vec<u16>. + /// Terminator 0x0000 is not included in the returned Vec<u16>. + /// + /// Performs appropriate bounds checks. + pub fn read_wide_str(&self, ptr: Scalar<M::PointerTag>) -> InterpResult<'tcx, Vec<u16>> { + let size_2bytes = Size::from_bytes(2); + let align_2bytes = Align::from_bytes(2).unwrap(); + // We need to read at least 2 bytes, so we *need* a ptr. + let mut ptr = self.force_ptr(ptr)?; + let allocation = self.get_raw(ptr.alloc_id)?; + let mut u16_seq = Vec::new(); + + loop { + ptr = self + .check_ptr_access(ptr.into(), size_2bytes, align_2bytes)? + .expect("cannot be a ZST"); + let single_u16 = allocation.read_scalar(self, ptr, size_2bytes)?.to_u16()?; + if single_u16 != 0x0000 { + u16_seq.push(single_u16); + ptr = ptr.offset(size_2bytes, self)?; + } else { + break; + } + } + Ok(u16_seq) + } + + /// Writes the given stream of bytes into memory. + /// + /// Performs appropriate bounds checks. + pub fn write_bytes( + &mut self, + ptr: Scalar<M::PointerTag>, + src: impl IntoIterator<Item = u8>, + ) -> InterpResult<'tcx> { + let mut src = src.into_iter(); + let size = Size::from_bytes(src.size_hint().0); + // `write_bytes` checks that this lower bound `size` matches the upper bound and reality. + let ptr = match self.check_ptr_access(ptr, size, Align::from_bytes(1).unwrap())? { + Some(ptr) => ptr, + None => { + // zero-sized access + src.next().expect_none("iterator said it was empty but returned an element"); + return Ok(()); + } + }; + let tcx = self.tcx; + self.get_raw_mut(ptr.alloc_id)?.write_bytes(&tcx, ptr, src) + } + + /// Writes the given stream of u16s into memory. + /// + /// Performs appropriate bounds checks. + pub fn write_u16s( + &mut self, + ptr: Scalar<M::PointerTag>, + src: impl IntoIterator<Item = u16>, + ) -> InterpResult<'tcx> { + let mut src = src.into_iter(); + let (lower, upper) = src.size_hint(); + let len = upper.expect("can only write bounded iterators"); + assert_eq!(lower, len, "can only write iterators with a precise length"); + + let size = Size::from_bytes(lower); + let ptr = match self.check_ptr_access(ptr, size, Align::from_bytes(2).unwrap())? { + Some(ptr) => ptr, + None => { + // zero-sized access + src.next().expect_none("iterator said it was empty but returned an element"); + return Ok(()); + } + }; + let tcx = self.tcx; + let allocation = self.get_raw_mut(ptr.alloc_id)?; + + for idx in 0..len { + let val = Scalar::from_u16( + src.next().expect("iterator was shorter than it said it would be"), + ); + let offset_ptr = ptr.offset(Size::from_bytes(idx) * 2, &tcx)?; // `Size` multiplication + allocation.write_scalar(&tcx, offset_ptr, val.into(), Size::from_bytes(2))?; + } + src.next().expect_none("iterator was longer than it said it would be"); + Ok(()) + } + + /// Expects the caller to have checked bounds and alignment. + pub fn copy( + &mut self, + src: Pointer<M::PointerTag>, + dest: Pointer<M::PointerTag>, + size: Size, + nonoverlapping: bool, + ) -> InterpResult<'tcx> { + self.copy_repeatedly(src, dest, size, 1, nonoverlapping) + } + + /// Expects the caller to have checked bounds and alignment. + pub fn copy_repeatedly( + &mut self, + src: Pointer<M::PointerTag>, + dest: Pointer<M::PointerTag>, + size: Size, + length: u64, + nonoverlapping: bool, + ) -> InterpResult<'tcx> { + // first copy the relocations to a temporary buffer, because + // `get_bytes_mut` will clear the relocations, which is correct, + // since we don't want to keep any relocations at the target. + // (`get_bytes_with_uninit_and_ptr` below checks that there are no + // relocations overlapping the edges; those would not be handled correctly). + let relocations = + self.get_raw(src.alloc_id)?.prepare_relocation_copy(self, src, size, dest, length); + + let tcx = self.tcx; + + // This checks relocation edges on the src. + let src_bytes = + self.get_raw(src.alloc_id)?.get_bytes_with_uninit_and_ptr(&tcx, src, size)?.as_ptr(); + let dest_bytes = + self.get_raw_mut(dest.alloc_id)?.get_bytes_mut(&tcx, dest, size * length)?; // `Size` multiplication + + // If `dest_bytes` is empty we just optimize to not run anything for zsts. + // See #67539 + if dest_bytes.is_empty() { + return Ok(()); + } + + let dest_bytes = dest_bytes.as_mut_ptr(); + + // Prepare a copy of the initialization mask. + let compressed = self.get_raw(src.alloc_id)?.compress_uninit_range(src, size); + + if compressed.no_bytes_init() { + // Fast path: If all bytes are `uninit` then there is nothing to copy. The target range + // is marked as uninitialized but we otherwise omit changing the byte representation which may + // be arbitrary for uninitialized bytes. + // This also avoids writing to the target bytes so that the backing allocation is never + // touched if the bytes stay uninitialized for the whole interpreter execution. On contemporary + // operating system this can avoid physically allocating the page. + let dest_alloc = self.get_raw_mut(dest.alloc_id)?; + dest_alloc.mark_init(dest, size * length, false); // `Size` multiplication + dest_alloc.mark_relocation_range(relocations); + return Ok(()); + } + + // SAFE: The above indexing would have panicked if there weren't at least `size` bytes + // behind `src` and `dest`. Also, we use the overlapping-safe `ptr::copy` if `src` and + // `dest` could possibly overlap. + // The pointers above remain valid even if the `HashMap` table is moved around because they + // point into the `Vec` storing the bytes. + unsafe { + if src.alloc_id == dest.alloc_id { + if nonoverlapping { + // `Size` additions + if (src.offset <= dest.offset && src.offset + size > dest.offset) + || (dest.offset <= src.offset && dest.offset + size > src.offset) + { + throw_ub_format!("copy_nonoverlapping called on overlapping ranges") + } + } + + for i in 0..length { + ptr::copy( + src_bytes, + dest_bytes.add((size * i).bytes_usize()), // `Size` multiplication + size.bytes_usize(), + ); + } + } else { + for i in 0..length { + ptr::copy_nonoverlapping( + src_bytes, + dest_bytes.add((size * i).bytes_usize()), // `Size` multiplication + size.bytes_usize(), + ); + } + } + } + + // now fill in all the data + self.get_raw_mut(dest.alloc_id)?.mark_compressed_init_range( + &compressed, + dest, + size, + length, + ); + + // copy the relocations to the destination + self.get_raw_mut(dest.alloc_id)?.mark_relocation_range(relocations); + + Ok(()) + } +} + +/// Machine pointer introspection. +impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { + pub fn force_ptr( + &self, + scalar: Scalar<M::PointerTag>, + ) -> InterpResult<'tcx, Pointer<M::PointerTag>> { + match scalar { + Scalar::Ptr(ptr) => Ok(ptr), + _ => M::int_to_ptr(&self, scalar.to_machine_usize(self)?), + } + } + + pub fn force_bits( + &self, + scalar: Scalar<M::PointerTag>, + size: Size, + ) -> InterpResult<'tcx, u128> { + match scalar.to_bits_or_ptr(size, self) { + Ok(bits) => Ok(bits), + Err(ptr) => Ok(M::ptr_to_int(&self, ptr)?.into()), + } + } +} |
