about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-08-13 04:32:34 +0000
committerbors <bors@rust-lang.org>2024-08-13 04:32:34 +0000
commit591ecb88dffdb0f233e2fae74fd3d7c81d65ff0c (patch)
treecd2dcd52c2c7eb4220eb42027136efad1f4976c4 /src
parente9e27ab0cf3759e420eeaa6bab7c7d454d9c10cd (diff)
parent4763d12207561037847cc7dea4b695f3c129f1d7 (diff)
downloadrust-591ecb88dffdb0f233e2fae74fd3d7c81d65ff0c.tar.gz
rust-591ecb88dffdb0f233e2fae74fd3d7c81d65ff0c.zip
Auto merge of #128742 - RalfJung:miri-vtable-uniqueness, r=saethlin
miri: make vtable addresses not globally unique

Miri currently gives vtables a unique global address. That's not actually matching reality though. So this PR enables Miri to generate different addresses for the same type-trait pair.

To avoid generating an unbounded number of `AllocId` (and consuming unbounded amounts of memory), we use the "salt" technique that we also already use for giving constants non-unique addresses: the cache is keyed on a "salt" value n top of the actually relevant key, and Miri picks a random salt (currently in the range `0..16`) each time it needs to choose an `AllocId` for one of these globals -- that means we'll get up to 16 different addresses for each vtable. The salt scheme is integrated into the global allocation deduplication logic in `tcx`, and also used for functions and string literals. (So this also fixes the problem that casting the same function to a fn ptr over and over will consume unbounded memory.)

r? `@saethlin`
Fixes https://github.com/rust-lang/miri/issues/3737
Diffstat (limited to 'src')
-rw-r--r--src/tools/miri/src/lib.rs1
-rw-r--r--src/tools/miri/src/machine.rs53
-rw-r--r--src/tools/miri/tests/pass/dyn-traits.rs10
-rw-r--r--src/tools/miri/tests/pass/function_pointers.rs3
-rw-r--r--src/tools/miri/tests/pass/rc.rs3
5 files changed, 63 insertions, 7 deletions
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index 2b3ae6df5de..966d38508f6 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -56,6 +56,7 @@ extern crate either;
 extern crate tracing;
 
 // The rustc crates we need
+extern crate rustc_attr;
 extern crate rustc_apfloat;
 extern crate rustc_ast;
 extern crate rustc_const_eval;
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index 94598e7d2e3..754f8cf1381 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -11,6 +11,7 @@ use rand::rngs::StdRng;
 use rand::Rng;
 use rand::SeedableRng;
 
+use rustc_attr::InlineAttr;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 #[allow(unused)]
 use rustc_data_structures::static_assert_size;
@@ -23,6 +24,7 @@ use rustc_middle::{
         Instance, Ty, TyCtxt,
     },
 };
+use rustc_session::config::InliningThreshold;
 use rustc_span::def_id::{CrateNum, DefId};
 use rustc_span::{Span, SpanData, Symbol};
 use rustc_target::abi::{Align, Size};
@@ -47,10 +49,10 @@ pub const SIGRTMIN: i32 = 34;
 /// `SIGRTMAX` - `SIGRTMIN` >= 8 (which is the value of `_POSIX_RTSIG_MAX`)
 pub const SIGRTMAX: i32 = 42;
 
-/// Each const has multiple addresses, but only this many. Since const allocations are never
-/// deallocated, choosing a new [`AllocId`] and thus base address for each evaluation would
-/// produce unbounded memory usage.
-const ADDRS_PER_CONST: usize = 16;
+/// Each anonymous global (constant, vtable, function pointer, ...) has multiple addresses, but only
+/// this many. Since const allocations are never deallocated, choosing a new [`AllocId`] and thus
+/// base address for each evaluation would produce unbounded memory usage.
+const ADDRS_PER_ANON_GLOBAL: usize = 32;
 
 /// Extra data stored with each stack frame
 pub struct FrameExtra<'tcx> {
@@ -1372,7 +1374,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
             catch_unwind: None,
             timing,
             is_user_relevant: ecx.machine.is_user_relevant(&frame),
-            salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_CONST,
+            salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL,
         };
 
         Ok(frame.with_extra(extra))
@@ -1518,4 +1520,45 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
             Entry::Occupied(oe) => Ok(oe.get().clone()),
         }
     }
+
+    fn get_global_alloc_salt(
+        ecx: &InterpCx<'tcx, Self>,
+        instance: Option<ty::Instance<'tcx>>,
+    ) -> usize {
+        let unique = if let Some(instance) = instance {
+            // Functions cannot be identified by pointers, as asm-equal functions can get
+            // deduplicated by the linker (we set the "unnamed_addr" attribute for LLVM) and
+            // functions can be duplicated across crates. We thus generate a new `AllocId` for every
+            // mention of a function. This means that `main as fn() == main as fn()` is false, while
+            // `let x = main as fn(); x == x` is true. However, as a quality-of-life feature it can
+            // be useful to identify certain functions uniquely, e.g. for backtraces. So we identify
+            // whether codegen will actually emit duplicate functions. It does that when they have
+            // non-lifetime generics, or when they can be inlined. All other functions are given a
+            // unique address. This is not a stable guarantee! The `inline` attribute is a hint and
+            // cannot be relied upon for anything. But if we don't do this, the
+            // `__rust_begin_short_backtrace`/`__rust_end_short_backtrace` logic breaks and panic
+            // backtraces look terrible.
+            let is_generic = instance
+                .args
+                .into_iter()
+                .any(|kind| !matches!(kind.unpack(), ty::GenericArgKind::Lifetime(_)));
+            let can_be_inlined = matches!(
+                ecx.tcx.sess.opts.unstable_opts.cross_crate_inline_threshold,
+                InliningThreshold::Always
+            ) || !matches!(
+                ecx.tcx.codegen_fn_attrs(instance.def_id()).inline,
+                InlineAttr::Never
+            );
+            !is_generic && !can_be_inlined
+        } else {
+            // Non-functions are never unique.
+            false
+        };
+        // Always use the same salt if the allocation is unique.
+        if unique {
+            CTFE_ALLOC_SALT
+        } else {
+            ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL
+        }
+    }
 }
diff --git a/src/tools/miri/tests/pass/dyn-traits.rs b/src/tools/miri/tests/pass/dyn-traits.rs
index 908d521a0d8..f6220120968 100644
--- a/src/tools/miri/tests/pass/dyn-traits.rs
+++ b/src/tools/miri/tests/pass/dyn-traits.rs
@@ -141,7 +141,17 @@ fn unsized_dyn_autoderef() {
 }
 */
 
+fn vtable_ptr_eq() {
+    use std::{fmt, ptr};
+
+    // We don't always get the same vtable when casting this to a wide pointer.
+    let x = &2;
+    let x_wide = x as &dyn fmt::Display;
+    assert!((0..256).any(|_| !ptr::eq(x as &dyn fmt::Display, x_wide)));
+}
+
 fn main() {
     ref_box_dyn();
     box_box_trait();
+    vtable_ptr_eq();
 }
diff --git a/src/tools/miri/tests/pass/function_pointers.rs b/src/tools/miri/tests/pass/function_pointers.rs
index 2aa3ebf2dd0..a5c4bc5e0d9 100644
--- a/src/tools/miri/tests/pass/function_pointers.rs
+++ b/src/tools/miri/tests/pass/function_pointers.rs
@@ -82,7 +82,8 @@ fn main() {
     assert!(return_fn_ptr(i) == i);
     assert!(return_fn_ptr(i) as unsafe fn() -> i32 == i as fn() -> i32 as unsafe fn() -> i32);
     // Miri gives different addresses to different reifications of a generic function.
-    assert!(return_fn_ptr(f) != f);
+    // at least if we try often enough.
+    assert!((0..256).any(|_| return_fn_ptr(f) != f));
     // However, if we only turn `f` into a function pointer and use that pointer,
     // it is equal to itself.
     let f2 = f as fn() -> i32;
diff --git a/src/tools/miri/tests/pass/rc.rs b/src/tools/miri/tests/pass/rc.rs
index 6dd1b3aff9e..b1470dabc26 100644
--- a/src/tools/miri/tests/pass/rc.rs
+++ b/src/tools/miri/tests/pass/rc.rs
@@ -75,7 +75,8 @@ fn rc_fat_ptr_eq() {
     let p = Rc::new(1) as Rc<dyn Debug>;
     let a: *const dyn Debug = &*p;
     let r = Rc::into_raw(p);
-    assert!(a == r);
+    // Only compare the pointer parts, as the vtable might differ.
+    assert!(a as *const () == r as *const ());
     drop(unsafe { Rc::from_raw(r) });
 }