Rollup merge of #143324 - RalfJung:native-call-prep, r=oli-obk

interpret: move the native call preparation logic into Miri `@nia-e` has to do a bunch of changes to this logic for her native call ptrace work, and it's getting annoying that the logic is split between Miri and rustc. So this moves the logic to Miri, keeping just the generic traversal part in rustc. It is unfortunate that this means we have to expose `get_alloc_raw`/`get_alloc_raw_mut`... I hope the function name is scary enough to reduce the risk of misuse. r? `@oli-obk`
author: Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com> 2025-07-03 05:21:35 +0200
committer: GitHub <noreply@github.com> 2025-07-03 05:21:35 +0200
commit: 7e600de3c8e03e57fba4099fc8751fe0eb7a81b1 (patch)
tree: a75eabad76999cc639481e441d87a622e76c4bcd
parent: f0007547618dd0c3adbc49026f8a93f8d8d8c647 (diff)
parent: de1278bd16fdef81a2b93f43d4ae9159da716d95 (diff)
download: rust-7e600de3c8e03e57fba4099fc8751fe0eb7a81b1.tar.gz
rust-7e600de3c8e03e57fba4099fc8751fe0eb7a81b1.zip
5 files changed, 67 insertions, 61 deletions
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
index 3b36bb85985..ff822b52a8d 100644
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -655,7 +655,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
     /// The caller is responsible for calling the access hooks!
     ///
     /// You almost certainly want to use `get_ptr_alloc`/`get_ptr_alloc_mut` instead.
-    fn get_alloc_raw(
+    pub fn get_alloc_raw(
         &self,
         id: AllocId,
     ) -> InterpResult<'tcx, &Allocation<M::Provenance, M::AllocExtra, M::Bytes>> {
@@ -757,7 +757,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
     ///
     /// Also returns a ptr to `self.extra` so that the caller can use it in parallel with the
     /// allocation.
-    fn get_alloc_raw_mut(
+    ///
+    /// You almost certainly want to use `get_ptr_alloc`/`get_ptr_alloc_mut` instead.
+    pub fn get_alloc_raw_mut(
         &mut self,
         id: AllocId,
     ) -> InterpResult<'tcx, (&mut Allocation<M::Provenance, M::AllocExtra, M::Bytes>, &mut M)> {
@@ -976,15 +978,15 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
         interp_ok(())
     }
 
-    /// Handle the effect an FFI call might have on the state of allocations.
-    /// This overapproximates the modifications which external code might make to memory:
-    /// We set all reachable allocations as initialized, mark all reachable provenances as exposed
-    /// and overwrite them with `Provenance::WILDCARD`.
-    ///
-    /// The allocations in `ids` are assumed to be already exposed.
-    pub fn prepare_for_native_call(&mut self, ids: Vec<AllocId>) -> InterpResult<'tcx> {
+    /// Visit all allocations reachable from the given start set, by recursively traversing the
+    /// provenance information of those allocations.
+    pub fn visit_reachable_allocs(
+        &mut self,
+        start: Vec<AllocId>,
+        mut visit: impl FnMut(&mut Self, AllocId, &AllocInfo) -> InterpResult<'tcx>,
+    ) -> InterpResult<'tcx> {
         let mut done = FxHashSet::default();
-        let mut todo = ids;
+        let mut todo = start;
         while let Some(id) = todo.pop() {
             if !done.insert(id) {
                 // We already saw this allocation before, don't process it again.
@@ -992,31 +994,20 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
             }
             let info = self.get_alloc_info(id);
 
-            // If there is no data behind this pointer, skip this.
-            if !matches!(info.kind, AllocKind::LiveData) {
-                continue;
-            }
-
-            // Expose all provenances in this allocation, and add them to `todo`.
-            let alloc = self.get_alloc_raw(id)?;
-            for prov in alloc.provenance().provenances() {
-                M::expose_provenance(self, prov)?;
-                if let Some(id) = prov.get_alloc_id() {
-                    todo.push(id);
+            // Recurse, if there is data here.
+            // Do this *before* invoking the callback, as the callback might mutate the
+            // allocation and e.g. replace all provenance by wildcards!
+            if matches!(info.kind, AllocKind::LiveData) {
+                let alloc = self.get_alloc_raw(id)?;
+                for prov in alloc.provenance().provenances() {
+                    if let Some(id) = prov.get_alloc_id() {
+                        todo.push(id);
+                    }
                 }
             }
-            // Also expose the provenance of the interpreter-level allocation, so it can
-            // be read by FFI. The `black_box` is defensive programming as LLVM likes
-            // to (incorrectly) optimize away ptr2int casts whose result is unused.
-            std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
-
-            // Prepare for possible write from native code if mutable.
-            if info.mutbl.is_mut() {
-                self.get_alloc_raw_mut(id)?
-                    .0
-                    .prepare_for_native_write()
-                    .map_err(|e| e.to_interp_error(id))?;
-            }
+
+            // Call the callback.
+            visit(self, id, &info)?;
         }
         interp_ok(())
     }
@@ -1073,7 +1064,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
             todo.extend(static_roots(self));
             while let Some(id) = todo.pop() {
                 if reachable.insert(id) {
-                    // This is a new allocation, add the allocation it points to `todo`.
+                    // This is a new allocation, add the allocations it points to `todo`.
+                    // We only need to care about `alloc_map` memory here, as entirely unchanged
+                    // global memory cannot point to memory relevant for the leak check.
                     if let Some((_, alloc)) = self.memory.alloc_map.get(id) {
                         todo.extend(
                             alloc.provenance().provenances().filter_map(|prov| prov.get_alloc_id()),
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs
index 4198b198ab1..d2cadc96b63 100644
--- a/compiler/rustc_middle/src/mir/interpret/allocation.rs
+++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs
@@ -799,7 +799,7 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
     /// Initialize all previously uninitialized bytes in the entire allocation, and set
     /// provenance of everything to `Wildcard`. Before calling this, make sure all
     /// provenance in this allocation is exposed!
-    pub fn prepare_for_native_write(&mut self) -> AllocResult {
+    pub fn prepare_for_native_access(&mut self) {
         let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) };
         // Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
         for chunk in self.init_mask.range_as_init_chunks(full_range) {
@@ -814,13 +814,6 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
 
         // Set provenance of all bytes to wildcard.
         self.provenance.write_wildcards(self.len());
-
-        // Also expose the provenance of the interpreter-level allocation, so it can
-        // be written by FFI. The `black_box` is defensive programming as LLVM likes
-        // to (incorrectly) optimize away ptr2int casts whose result is unused.
-        std::hint::black_box(self.get_bytes_unchecked_raw_mut().expose_provenance());
-
-        Ok(())
     }
 
     /// Remove all provenance in the given memory range.
diff --git a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs
index c9525df1f79..63608947eb3 100644
--- a/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs
+++ b/compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs
@@ -120,7 +120,7 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {
         }
     }
 
-    /// Check if here is ptr-sized provenance at the given index.
+    /// Check if there is ptr-sized provenance at the given index.
     /// Does not mean anything for bytewise provenance! But can be useful as an optimization.
     pub fn get_ptr(&self, offset: Size) -> Option<Prov> {
         self.ptrs.get(&offset).copied()
diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs
index 1796120cf8a..3cc38fa087c 100644
--- a/src/tools/miri/src/alloc_addresses/mod.rs
+++ b/src/tools/miri/src/alloc_addresses/mod.rs
@@ -466,17 +466,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         Some((alloc_id, Size::from_bytes(rel_offset)))
     }
 
-    /// Prepare all exposed memory for a native call.
-    /// This overapproximates the modifications which external code might make to memory:
-    /// We set all reachable allocations as initialized, mark all reachable provenances as exposed
-    /// and overwrite them with `Provenance::WILDCARD`.
-    fn prepare_exposed_for_native_call(&mut self) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-        // We need to make a deep copy of this list, but it's fine; it also serves as scratch space
-        // for the search within `prepare_for_native_call`.
-        let exposed: Vec<AllocId> =
-            this.machine.alloc_addresses.get_mut().exposed.iter().copied().collect();
-        this.prepare_for_native_call(exposed)
+    /// Return a list of all exposed allocations.
+    fn exposed_allocs(&self) -> Vec<AllocId> {
+        let this = self.eval_context_ref();
+        this.machine.alloc_addresses.borrow().exposed.iter().copied().collect()
     }
 }
 
diff --git a/src/tools/miri/src/shims/native_lib/mod.rs b/src/tools/miri/src/shims/native_lib/mod.rs
index 9c659f65e50..9b30d8ce78b 100644
--- a/src/tools/miri/src/shims/native_lib/mod.rs
+++ b/src/tools/miri/src/shims/native_lib/mod.rs
@@ -198,7 +198,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
         for arg in args.iter() {
             if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
-                throw_unsup_format!("only scalar argument types are support for native calls")
+                throw_unsup_format!("only scalar argument types are supported for native calls")
             }
             let imm = this.read_immediate(arg)?;
             libffi_args.push(imm_to_carg(&imm, this)?);
@@ -224,16 +224,42 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 this.expose_provenance(prov)?;
             }
         }
-
-        // Prepare all exposed memory.
-        this.prepare_exposed_for_native_call()?;
-
-        // Convert them to `libffi::high::Arg` type.
+        // Convert arguments to `libffi::high::Arg` type.
         let libffi_args = libffi_args
             .iter()
             .map(|arg| arg.arg_downcast())
             .collect::<Vec<libffi::high::Arg<'_>>>();
 
+        // Prepare all exposed memory (both previously exposed, and just newly exposed since a
+        // pointer was passed as argument).
+        this.visit_reachable_allocs(this.exposed_allocs(), |this, alloc_id, info| {
+            // If there is no data behind this pointer, skip this.
+            if !matches!(info.kind, AllocKind::LiveData) {
+                return interp_ok(());
+            }
+            // It's okay to get raw access, what we do does not correspond to any actual
+            // AM operation, it just approximates the state to account for the native call.
+            let alloc = this.get_alloc_raw(alloc_id)?;
+            // Also expose the provenance of the interpreter-level allocation, so it can
+            // be read by FFI. The `black_box` is defensive programming as LLVM likes
+            // to (incorrectly) optimize away ptr2int casts whose result is unused.
+            std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
+            // Expose all provenances in this allocation, since the native code can do $whatever.
+            for prov in alloc.provenance().provenances() {
+                this.expose_provenance(prov)?;
+            }
+
+            // Prepare for possible write from native code if mutable.
+            if info.mutbl.is_mut() {
+                let alloc = &mut this.get_alloc_raw_mut(alloc_id)?.0;
+                alloc.prepare_for_native_access();
+                // Also expose *mutable* provenance for the interpreter-level allocation.
+                std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
+            }
+
+            interp_ok(())
+        })?;
+
         // Call the function and store output, depending on return type in the function signature.
         let (ret, maybe_memevents) =
             this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
@@ -321,7 +347,8 @@ fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'
             CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
         ty::RawPtr(..) => {
             let s = v.to_scalar().to_pointer(cx)?.addr();
-            // This relies on the `expose_provenance` in `prepare_for_native_call`.
+            // This relies on the `expose_provenance` in the `visit_reachable_allocs` callback
+            // above.
             CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
         }
         _ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
author	Matthias Krüger <476013+matthiaskrgr@users.noreply.github.com>	2025-07-03 05:21:35 +0200
committer	GitHub <noreply@github.com>	2025-07-03 05:21:35 +0200
commit	7e600de3c8e03e57fba4099fc8751fe0eb7a81b1 (patch)
tree	a75eabad76999cc639481e441d87a622e76c4bcd
parent	f0007547618dd0c3adbc49026f8a93f8d8d8c647 (diff)
parent	de1278bd16fdef81a2b93f43d4ae9159da716d95 (diff)
download	rust-7e600de3c8e03e57fba4099fc8751fe0eb7a81b1.tar.gz rust-7e600de3c8e03e57fba4099fc8751fe0eb7a81b1.zip