about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/tools/miri/src/intptrcast.rs221
-rw-r--r--src/tools/miri/src/lib.rs2
-rw-r--r--src/tools/miri/src/machine.rs10
3 files changed, 129 insertions, 104 deletions
diff --git a/src/tools/miri/src/intptrcast.rs b/src/tools/miri/src/intptrcast.rs
index 0bdea157633..ab6a256f714 100644
--- a/src/tools/miri/src/intptrcast.rs
+++ b/src/tools/miri/src/intptrcast.rs
@@ -26,8 +26,10 @@ pub type GlobalState = RefCell<GlobalStateInner>;
 
 #[derive(Clone, Debug)]
 pub struct GlobalStateInner {
-    /// This is used as a map between the address of each allocation and its `AllocId`.
-    /// It is always sorted
+    /// This is used as a map between the address of each allocation and its `AllocId`. It is always
+    /// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the
+    /// base address, and we need to find the `AllocId` it belongs to.
+    /// This is not the *full* inverse of `base_addr`; dead allocations have been removed.
     int_to_ptr_map: Vec<(u64, AllocId)>,
     /// The base address for each allocation.  We cannot put that into
     /// `AllocExtra` because function pointers also have a base address, and
@@ -62,10 +64,21 @@ impl GlobalStateInner {
     }
 }
 
-impl<'mir, 'tcx> GlobalStateInner {
+/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
+/// of `align` that is larger or equal to `addr`
+fn align_addr(addr: u64, align: u64) -> u64 {
+    match addr % align {
+        0 => addr,
+        rem => addr.checked_add(align).unwrap() - rem,
+    }
+}
+
+impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
     // Returns the exposed `AllocId` that corresponds to the specified addr,
     // or `None` if the addr is out of bounds
-    fn alloc_id_from_addr(ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64) -> Option<AllocId> {
+    fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
+        let ecx = self.eval_context_ref();
         let global_state = ecx.machine.intptrcast.borrow();
         assert!(global_state.provenance_mode != ProvenanceMode::Strict);
 
@@ -82,94 +95,40 @@ impl<'mir, 'tcx> GlobalStateInner {
                 let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
                 // This never overflows because `addr >= glb`
                 let offset = addr - glb;
-                // If the offset exceeds the size of the allocation, don't use this `alloc_id`.
+                // We require this to be strict in-bounds of the allocation. This arm is only
+                // entered for addresses that are not the base address, so even zero-sized
+                // allocations will get recognized at their base address -- but all other
+                // allocations will *not* be recognized at their "end" address.
                 let size = ecx.get_alloc_info(alloc_id).0;
-                if offset <= size.bytes() { Some(alloc_id) } else { None }
+                if offset < size.bytes() { Some(alloc_id) } else { None }
             }
         }?;
 
-        // We only use this provenance if it has been exposed, *and* is still live.
+        // We only use this provenance if it has been exposed.
         if global_state.exposed.contains(&alloc_id) {
-            let (_size, _align, kind) = ecx.get_alloc_info(alloc_id);
-            match kind {
-                AllocKind::LiveData | AllocKind::Function | AllocKind::VTable => {
-                    return Some(alloc_id);
-                }
-                AllocKind::Dead => {}
-            }
-        }
-
-        None
-    }
-
-    pub fn expose_ptr(
-        ecx: &mut MiriInterpCx<'mir, 'tcx>,
-        alloc_id: AllocId,
-        tag: BorTag,
-    ) -> InterpResult<'tcx> {
-        let global_state = ecx.machine.intptrcast.get_mut();
-        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
-        if global_state.provenance_mode != ProvenanceMode::Strict {
-            trace!("Exposing allocation id {alloc_id:?}");
-            global_state.exposed.insert(alloc_id);
-            if ecx.machine.borrow_tracker.is_some() {
-                ecx.expose_tag(alloc_id, tag)?;
-            }
-        }
-        Ok(())
-    }
-
-    pub fn ptr_from_addr_cast(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        addr: u64,
-    ) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
-        trace!("Casting {:#x} to a pointer", addr);
-
-        // Potentially emit a warning.
-        let global_state = ecx.machine.intptrcast.borrow();
-        match global_state.provenance_mode {
-            ProvenanceMode::Default => {
-                // The first time this happens at a particular location, print a warning.
-                thread_local! {
-                    // `Span` is non-`Send`, so we use a thread-local instead.
-                    static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
-                }
-                PAST_WARNINGS.with_borrow_mut(|past_warnings| {
-                    let first = past_warnings.is_empty();
-                    if past_warnings.insert(ecx.cur_span()) {
-                        // Newly inserted, so first time we see this span.
-                        ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
-                    }
-                });
-            }
-            ProvenanceMode::Strict => {
-                throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
-            }
-            ProvenanceMode::Permissive => {}
+            // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
+            debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead));
+            Some(alloc_id)
+        } else {
+            None
         }
-
-        // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
-        // completely legal to do a cast and then `wrapping_offset` to another allocation and only
-        // *then* do a memory access. So the allocation that the pointer happens to point to on a
-        // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
-        // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
-        // allocation it might be referencing.
-        Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
     }
 
-    fn alloc_base_addr(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        alloc_id: AllocId,
-    ) -> InterpResult<'tcx, u64> {
+    fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
+        let ecx = self.eval_context_ref();
         let mut global_state = ecx.machine.intptrcast.borrow_mut();
         let global_state = &mut *global_state;
 
         Ok(match global_state.base_addr.entry(alloc_id) {
             Entry::Occupied(entry) => *entry.get(),
             Entry::Vacant(entry) => {
-                // There is nothing wrong with a raw pointer being cast to an integer only after
-                // it became dangling.  Hence we allow dead allocations.
-                let (size, align, _kind) = ecx.get_alloc_info(alloc_id);
+                let (size, align, kind) = ecx.get_alloc_info(alloc_id);
+                // This is either called immediately after allocation (and then cached), or when
+                // adjusting `tcx` pointers (which never get freed). So assert that we are looking
+                // at a live allocation. This also ensures that we never re-assign an address to an
+                // allocation that previously had an address, but then was freed and the address
+                // information was removed.
+                assert!(!matches!(kind, AllocKind::Dead));
 
                 // This allocation does not have a base address yet, pick one.
                 // Leave some space to the previous allocation, to give it some chance to be less aligned.
@@ -183,7 +142,7 @@ impl<'mir, 'tcx> GlobalStateInner {
                     .next_base_addr
                     .checked_add(slack)
                     .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
-                let base_addr = Self::align_addr(base_addr, align.bytes());
+                let base_addr = align_addr(base_addr, align.bytes());
                 entry.insert(base_addr);
                 trace!(
                     "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
@@ -205,6 +164,7 @@ impl<'mir, 'tcx> GlobalStateInner {
                 if global_state.next_base_addr > ecx.target_usize_max() {
                     throw_exhaust!(AddressSpaceFull);
                 }
+                // Also maintain the opposite mapping in `int_to_ptr_map`.
                 // Given that `next_base_addr` increases in each allocation, pushing the
                 // corresponding tuple keeps `int_to_ptr_map` sorted
                 global_state.int_to_ptr_map.push((base_addr, alloc_id));
@@ -213,15 +173,71 @@ impl<'mir, 'tcx> GlobalStateInner {
             }
         })
     }
+}
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
+    fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
+        let ecx = self.eval_context_mut();
+        let global_state = ecx.machine.intptrcast.get_mut();
+        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
+        if global_state.provenance_mode != ProvenanceMode::Strict {
+            trace!("Exposing allocation id {alloc_id:?}");
+            global_state.exposed.insert(alloc_id);
+            if ecx.machine.borrow_tracker.is_some() {
+                ecx.expose_tag(alloc_id, tag)?;
+            }
+        }
+        Ok(())
+    }
+
+    fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
+        trace!("Casting {:#x} to a pointer", addr);
+
+        let ecx = self.eval_context_ref();
+        let global_state = ecx.machine.intptrcast.borrow();
+
+        // Potentially emit a warning.
+        match global_state.provenance_mode {
+            ProvenanceMode::Default => {
+                // The first time this happens at a particular location, print a warning.
+                thread_local! {
+                    // `Span` is non-`Send`, so we use a thread-local instead.
+                    static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
+                }
+                PAST_WARNINGS.with_borrow_mut(|past_warnings| {
+                    let first = past_warnings.is_empty();
+                    if past_warnings.insert(ecx.cur_span()) {
+                        // Newly inserted, so first time we see this span.
+                        ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
+                    }
+                });
+            }
+            ProvenanceMode::Strict => {
+                throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
+            }
+            ProvenanceMode::Permissive => {}
+        }
+
+        // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
+        // completely legal to do a cast and then `wrapping_offset` to another allocation and only
+        // *then* do a memory access. So the allocation that the pointer happens to point to on a
+        // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
+        // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
+        // allocation it might be referencing.
+        Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
+    }
 
     /// Convert a relative (tcx) pointer to a Miri pointer.
-    pub fn ptr_from_rel_ptr(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
+    fn ptr_from_rel_ptr(
+        &self,
         ptr: Pointer<AllocId>,
         tag: BorTag,
     ) -> InterpResult<'tcx, Pointer<Provenance>> {
+        let ecx = self.eval_context_ref();
+
         let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
-        let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?;
+        let base_addr = ecx.addr_from_alloc_id(alloc_id)?;
 
         // Add offset with the right kind of pointer-overflowing arithmetic.
         let dl = ecx.data_layout();
@@ -231,22 +247,21 @@ impl<'mir, 'tcx> GlobalStateInner {
 
     /// When a pointer is used for a memory access, this computes where in which allocation the
     /// access is going.
-    pub fn ptr_get_alloc(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        ptr: Pointer<Provenance>,
-    ) -> Option<(AllocId, Size)> {
+    fn ptr_get_alloc(&self, ptr: Pointer<Provenance>) -> Option<(AllocId, Size)> {
+        let ecx = self.eval_context_ref();
+
         let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)
 
         let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
             alloc_id
         } else {
             // A wildcard pointer.
-            GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())?
+            ecx.alloc_id_from_addr(addr.bytes())?
         };
 
         // This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr
-        // must have been called in the past.
-        let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap();
+        // must have been called in the past, so we can just look up the address in the map.
+        let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap();
 
         // Wrapping "addr - base_addr"
         #[allow(clippy::cast_possible_wrap)] // we want to wrap here
@@ -256,14 +271,24 @@ impl<'mir, 'tcx> GlobalStateInner {
             Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0),
         ))
     }
+}
 
-    /// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
-    /// of `align` that is larger or equal to `addr`
-    fn align_addr(addr: u64, align: u64) -> u64 {
-        match addr % align {
-            0 => addr,
-            rem => addr.checked_add(align).unwrap() - rem,
-        }
+impl GlobalStateInner {
+    pub fn free_alloc_id(&mut self, dead_id: AllocId) {
+        // We can *not* remove this from `base_addr`, since the interpreter design requires that we
+        // be able to retrieve an AllocId + offset for any memory access *before* we check if the
+        // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
+        // access to determine the allocation ID and offset -- and there can still be pointers with
+        // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
+        // `None` only if the pointer truly has no provenance (this ensures consistent error
+        // messages).
+        // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
+        // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
+        // returns a dead allocation.
+        self.int_to_ptr_map.retain(|&(_, id)| id != dead_id);
+        // We can also remove it from `exposed`, since this allocation can anyway not be returned by
+        // `alloc_id_from_addr` any more.
+        self.exposed.remove(&dead_id);
     }
 }
 
@@ -273,7 +298,7 @@ mod tests {
 
     #[test]
     fn test_align_addr() {
-        assert_eq!(GlobalStateInner::align_addr(37, 4), 40);
-        assert_eq!(GlobalStateInner::align_addr(44, 4), 44);
+        assert_eq!(align_addr(37, 4), 40);
+        assert_eq!(align_addr(44, 4), 44);
     }
 }
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index f1d8ce01bc2..68b9164dec0 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -117,7 +117,7 @@ pub use crate::eval::{
     create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith,
 };
 pub use crate::helpers::EvalContextExt as _;
-pub use crate::intptrcast::ProvenanceMode;
+pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode};
 pub use crate::machine::{
     AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind,
     PrimitiveLayouts, Provenance, ProvenanceExtra,
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index 0ade43d4a8d..d5775912eab 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -1149,7 +1149,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
             // Value does not matter, SB is disabled
             BorTag::default()
         };
-        intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag)
+        ecx.ptr_from_rel_ptr(ptr, tag)
     }
 
     /// Called on `usize as ptr` casts.
@@ -1158,7 +1158,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         ecx: &MiriInterpCx<'mir, 'tcx>,
         addr: u64,
     ) -> InterpResult<'tcx, Pointer<Option<Self::Provenance>>> {
-        intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr)
+        ecx.ptr_from_addr_cast(addr)
     }
 
     /// Called on `ptr as usize` casts.
@@ -1169,8 +1169,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         ptr: Pointer<Self::Provenance>,
     ) -> InterpResult<'tcx> {
         match ptr.provenance {
-            Provenance::Concrete { alloc_id, tag } =>
-                intptrcast::GlobalStateInner::expose_ptr(ecx, alloc_id, tag),
+            Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
             Provenance::Wildcard => {
                 // No need to do anything for wildcard pointers as
                 // their provenances have already been previously exposed.
@@ -1191,7 +1190,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         ecx: &MiriInterpCx<'mir, 'tcx>,
         ptr: Pointer<Self::Provenance>,
     ) -> Option<(AllocId, Size, Self::ProvenanceExtra)> {
-        let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr);
+        let rel = ecx.ptr_get_alloc(ptr);
 
         rel.map(|(alloc_id, size)| {
             let tag = match ptr.provenance {
@@ -1263,6 +1262,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
         {
             *deallocated_at = Some(machine.current_span());
         }
+        machine.intptrcast.get_mut().free_alloc_id(alloc_id);
         Ok(())
     }