interpret: move the native call preparation logic into Miri

author: Ralf Jung <post@ralfj.de> 2025-07-02 14:16:28 +0200
committer: Ralf Jung <post@ralfj.de> 2025-07-02 14:25:11 +0200
commit: de1278bd16fdef81a2b93f43d4ae9159da716d95 (patch)
tree: ba4691722cddf571a55e74346ba33124b567b3d1
parent: f51c9870bab634afb9e7a262b6ca7816bb9e940d (diff)
download: rust-de1278bd16fdef81a2b93f43d4ae9159da716d95.tar.gz
rust-de1278bd16fdef81a2b93f43d4ae9159da716d95.zip
5 files changed, 67 insertions, 61 deletions
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
index 3b36bb85985..ff822b52a8d 100644
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -655,7 +655,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
     /// The caller is responsible for calling the access hooks!
     ///
     /// You almost certainly want to use `get_ptr_alloc`/`get_ptr_alloc_mut` instead.
-    fn get_alloc_raw(
+    pub fn get_alloc_raw(
         &self,
         id: AllocId,
     ) -> InterpResult<'tcx, &Allocation<M::Provenance, M::AllocExtra, M::Bytes>> {
@@ -757,7 +757,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
     ///
     /// Also returns a ptr to `self.extra` so that the caller can use it in parallel with the
     /// allocation.
-    fn get_alloc_raw_mut(
+    ///
+    /// You almost certainly want to use `get_ptr_alloc`/`get_ptr_alloc_mut` instead.
+    pub fn get_alloc_raw_mut(
         &mut self,
         id: AllocId,
     ) -> InterpResult<'tcx, (&mut Allocation<M::Provenance, M::AllocExtra, M::Bytes>, &mut M)> {
@@ -976,15 +978,15 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
         interp_ok(())
     }
 
-    /// Handle the effect an FFI call might have on the state of allocations.
-    /// This overapproximates the modifications which external code might make to memory:
-    /// We set all reachable allocations as initialized, mark all reachable provenances as exposed
-    /// and overwrite them with `Provenance::WILDCARD`.
-    ///
-    /// The allocations in `ids` are assumed to be already exposed.
-    pub fn prepare_for_native_call(&mut self, ids: Vec<AllocId>) -> InterpResult<'tcx> {
+    /// Visit all allocations reachable from the given start set, by recursively traversing the
+    /// provenance information of those allocations.
+    pub fn visit_reachable_allocs(
+        &mut self,
+        start: Vec<AllocId>,
+        mut visit: impl FnMut(&mut Self, AllocId, &AllocInfo) -> InterpResult<'tcx>,
+    ) -> InterpResult<'tcx> {
         let mut done = FxHashSet::default();
-        let mut todo = ids;
+        let mut todo = start;
         while let Some(id) = todo.pop() {
             if !done.insert(id) {
                 // We already saw this allocation before, don't process it again.
@@ -992,31 +994,20 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
             }
             let info = self.get_alloc_info(id);
 
-            // If there is no data behind this pointer, skip this.
-            if !matches!(info.kind, AllocKind::LiveData) {
-                continue;
-            }
-
-            // Expose all provenances in this allocation, and add them to `todo`.
-            let alloc = self.get_alloc_raw(id)?;
-            for prov in alloc.provenance().provenances() {
-                M::expose_provenance(self, prov)?;
-                if let Some(id) = prov.get_alloc_id() {
-                    todo.push(id);
+            // Recurse, if there is data here.
+            // Do this *before* invoking the callback, as the callback might mutate the
+            // allocation and e.g. replace all provenance by wildcards!
+            if matches!(info.kind, AllocKind::LiveData) {
+                let alloc = self.get_alloc_raw(id)?;
+                for prov in alloc.provenance().provenances() {
+                    if let Some(id) = prov.get_alloc_id() {
+                        todo.push(id);
+                    }
                 }
             }
-            // Also expose the provenance of the interpreter-level allocation, so it can
-            // be read by FFI. The `black_box` is defensive programming as LLVM likes
-            // to (incorrectly) optimize away ptr2int casts whose result is unused.
-            std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
-
-            // Prepare for possible write from native code if mutable.
-            if info.mutbl.is_mut() {
-                self.get_alloc_raw_mut(id)?
-                    .0
-                    .prepare_for_native_write()
-                    .map_err(|e| e.to_interp_error(id))?;
-            }
+
+            // Call the callback.
+            visit(self, id, &info)?;
         }
         interp_ok(())
     }
@@ -1073,7 +1064,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
             todo.extend(static_roots(self));
             while let Some(id) = todo.pop() {
                 if reachable.insert(id) {
-                    // This is a new allocation, add the allocation it points to `todo`.
+                    // This is a new allocation, add the allocations it points to `todo`.
+                    // We only need to care about `alloc_map` memory here, as entirely unchanged
+                    // global memory cannot point to memory relevant for the leak check.
                     if let Some((_, alloc)) = self.memory.alloc_map.get(id) {
                         todo.extend(
                             alloc.provenance().provenances().filter_map(|prov| prov.get_alloc_id()),
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs
index 4198b198ab1..d2cadc96b63 100644
--- a/compiler/rustc_middle/src/mir/interpret/allocation.rs
+++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs
@@ -799,7 +799,7 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
     /// Initialize all previously uninitialized bytes in the entire allocation, and set
     /// provenance of everything to `Wildcard`. Before calling this, make sure all
     /// provenance in this allocation is exposed!
-    pub fn prepare_for_native_write(&mut self) -> AllocResult {
+    pub fn prepare_for_native_access(&mut self) {
         let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) };
         // Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
         for chunk in self.init_mask.range_as_init_chunks(full_range) {
@@ -814,13 +814,6 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
 
         // Set provenance of all bytes to wildcard.
         self.provenance.write_wildcards(self.len());
-
-        // Also expose the provenance of the interpreter-level allocation, so it can
-        // be written by FFI. The `black_box` is defensive programming as LLVM likes
-        // to (incorrectly) optimize away ptr2int casts whose result is unused.
-        std::hint::black_box(self.get_bytes_unchecked_raw_mut().expose_provenance());
-
-        Ok(())
     }
 
     /// Remove all provenance in the given memory range.
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs
index c9525df1f79..63608947eb3 100644
--- a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs
+++ b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs
@@ -120,7 +120,7 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {
         }
     }
 
-    /// Check if here is ptr-sized provenance at the given index.
+    /// Check if there is ptr-sized provenance at the given index.
     /// Does not mean anything for bytewise provenance! But can be useful as an optimization.
     pub fn get_ptr(&self, offset: Size) -> Option<Prov> {
         self.ptrs.get(&offset).copied()
diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs
index 1796120cf8a..3cc38fa087c 100644
--- a/src/tools/miri/src/alloc_addresses/mod.rs
+++ b/src/tools/miri/src/alloc_addresses/mod.rs
@@ -466,17 +466,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         Some((alloc_id, Size::from_bytes(rel_offset)))
     }
 
-    /// Prepare all exposed memory for a native call.
-    /// This overapproximates the modifications which external code might make to memory:
-    /// We set all reachable allocations as initialized, mark all reachable provenances as exposed
-    /// and overwrite them with `Provenance::WILDCARD`.
-    fn prepare_exposed_for_native_call(&mut self) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-        // We need to make a deep copy of this list, but it's fine; it also serves as scratch space
-        // for the search within `prepare_for_native_call`.
-        let exposed: Vec<AllocId> =
-            this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
-        this.prepare_for_native_call(exposed)
+    /// Return a list of all exposed allocations.
+    fn exposed_allocs(&self) -> Vec<AllocId> {
+        let this = self.eval_context_ref();
+        this.machine.alloc_addresses.borrow().exposed.iter().copied().collect()
     }
 }
 
diff --git a/src/tools/miri/src/shims/native_lib/mod.rs b/src/tools/miri/src/shims/native_lib/mod.rs
index 9c659f65e50..9b30d8ce78b 100644
--- a/src/tools/miri/src/shims/native_lib/mod.rs
+++ b/src/tools/miri/src/shims/native_lib/mod.rs
@@ -198,7 +198,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
         for arg in args.iter() {
             if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
-                throw_unsup_format!("only scalar argument types are support for native calls")
+                throw_unsup_format!("only scalar argument types are supported for native calls")
             }
             let imm = this.read_immediate(arg)?;
             libffi_args.push(imm_to_carg(&imm, this)?);
@@ -224,16 +224,42 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 this.expose_provenance(prov)?;
             }
         }
-
-        // Prepare all exposed memory.
-        this.prepare_exposed_for_native_call()?;
-
-        // Convert them to `libffi::high::Arg` type.
+        // Convert arguments to `libffi::high::Arg` type.
         let libffi_args = libffi_args
             .iter()
             .map(|arg| arg.arg_downcast())
             .collect::<Vec<libffi::high::Arg<'_>>>();
 
+        // Prepare all exposed memory (both previously exposed, and just newly exposed since a
+        // pointer was passed as argument).
+        this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
+            // If there is no data behind this pointer, skip this.
+            if !matches!(info.kind, AllocKind::LiveData) {
+                return interp_ok(());
+            }
+            // It's okay to get raw access, what we do does not correspond to any actual
+            // AM operation, it just approximates the state to account for the native call.
+            let alloc = this.get_alloc_raw(alloc_id)?;
+            // Also expose the provenance of the interpreter-level allocation, so it can
+            // be read by FFI. The `black_box` is defensive programming as LLVM likes
+            // to (incorrectly) optimize away ptr2int casts whose result is unused.
+            std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
+            // Expose all provenances in this allocation, since the native code can do $whatever.
+            for prov in alloc.provenance().provenances() {
+                this.expose_provenance(prov)?;
+            }
+
+            // Prepare for possible write from native code if mutable.
+            if info.mutbl.is_mut() {
+                let alloc = &mut this.get_alloc_raw_mut(alloc_id)?.0;
+                alloc.prepare_for_native_access();
+                // Also expose *mutable* provenance for the interpreter-level allocation.
+                std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
+            }
+
+            interp_ok(())
+        })?;
+
         // Call the function and store output, depending on return type in the function signature.
         let (ret, maybe_memevents) =
             this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
@@ -321,7 +347,8 @@ fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'
             CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
         ty::RawPtr(..) => {
             let s = v.to_scalar().to_pointer(cx)?.addr();
-            // This relies on the `expose_provenance` in `prepare_for_native_call`.
+            // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback
+            // above.
             CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
         }
         _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
author	Ralf Jung <post@ralfj.de>	2025-07-02 14:16:28 +0200
committer	Ralf Jung <post@ralfj.de>	2025-07-02 14:25:11 +0200
commit	de1278bd16fdef81a2b93f43d4ae9159da716d95 (patch)
tree	ba4691722cddf571a55e74346ba33124b567b3d1
parent	f51c9870bab634afb9e7a262b6ca7816bb9e940d (diff)
download	rust-de1278bd16fdef81a2b93f43d4ae9159da716d95.tar.gz rust-de1278bd16fdef81a2b93f43d4ae9159da716d95.zip