diff options
| author | bors <bors@rust-lang.org> | 2024-08-13 04:32:34 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2024-08-13 04:32:34 +0000 |
| commit | 591ecb88dffdb0f233e2fae74fd3d7c81d65ff0c (patch) | |
| tree | cd2dcd52c2c7eb4220eb42027136efad1f4976c4 /src | |
| parent | e9e27ab0cf3759e420eeaa6bab7c7d454d9c10cd (diff) | |
| parent | 4763d12207561037847cc7dea4b695f3c129f1d7 (diff) | |
| download | rust-591ecb88dffdb0f233e2fae74fd3d7c81d65ff0c.tar.gz rust-591ecb88dffdb0f233e2fae74fd3d7c81d65ff0c.zip | |
Auto merge of #128742 - RalfJung:miri-vtable-uniqueness, r=saethlin
miri: make vtable addresses not globally unique Miri currently gives vtables a unique global address. That's not actually matching reality though. So this PR enables Miri to generate different addresses for the same type-trait pair. To avoid generating an unbounded number of `AllocId` (and consuming unbounded amounts of memory), we use the "salt" technique that we also already use for giving constants non-unique addresses: the cache is keyed on a "salt" value n top of the actually relevant key, and Miri picks a random salt (currently in the range `0..16`) each time it needs to choose an `AllocId` for one of these globals -- that means we'll get up to 16 different addresses for each vtable. The salt scheme is integrated into the global allocation deduplication logic in `tcx`, and also used for functions and string literals. (So this also fixes the problem that casting the same function to a fn ptr over and over will consume unbounded memory.) r? `@saethlin` Fixes https://github.com/rust-lang/miri/issues/3737
Diffstat (limited to 'src')
| -rw-r--r-- | src/tools/miri/src/lib.rs | 1 | ||||
| -rw-r--r-- | src/tools/miri/src/machine.rs | 53 | ||||
| -rw-r--r-- | src/tools/miri/tests/pass/dyn-traits.rs | 10 | ||||
| -rw-r--r-- | src/tools/miri/tests/pass/function_pointers.rs | 3 | ||||
| -rw-r--r-- | src/tools/miri/tests/pass/rc.rs | 3 |
5 files changed, 63 insertions, 7 deletions
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index 2b3ae6df5de..966d38508f6 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -56,6 +56,7 @@ extern crate either; extern crate tracing; // The rustc crates we need +extern crate rustc_attr; extern crate rustc_apfloat; extern crate rustc_ast; extern crate rustc_const_eval; diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 94598e7d2e3..754f8cf1381 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -11,6 +11,7 @@ use rand::rngs::StdRng; use rand::Rng; use rand::SeedableRng; +use rustc_attr::InlineAttr; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; #[allow(unused)] use rustc_data_structures::static_assert_size; @@ -23,6 +24,7 @@ use rustc_middle::{ Instance, Ty, TyCtxt, }, }; +use rustc_session::config::InliningThreshold; use rustc_span::def_id::{CrateNum, DefId}; use rustc_span::{Span, SpanData, Symbol}; use rustc_target::abi::{Align, Size}; @@ -47,10 +49,10 @@ pub const SIGRTMIN: i32 = 34; /// `SIGRTMAX` - `SIGRTMIN` >= 8 (which is the value of `_POSIX_RTSIG_MAX`) pub const SIGRTMAX: i32 = 42; -/// Each const has multiple addresses, but only this many. Since const allocations are never -/// deallocated, choosing a new [`AllocId`] and thus base address for each evaluation would -/// produce unbounded memory usage. -const ADDRS_PER_CONST: usize = 16; +/// Each anonymous global (constant, vtable, function pointer, ...) has multiple addresses, but only +/// this many. Since const allocations are never deallocated, choosing a new [`AllocId`] and thus +/// base address for each evaluation would produce unbounded memory usage. +const ADDRS_PER_ANON_GLOBAL: usize = 32; /// Extra data stored with each stack frame pub struct FrameExtra<'tcx> { @@ -1372,7 +1374,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { catch_unwind: None, timing, is_user_relevant: ecx.machine.is_user_relevant(&frame), - salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_CONST, + salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL, }; Ok(frame.with_extra(extra)) @@ -1518,4 +1520,45 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { Entry::Occupied(oe) => Ok(oe.get().clone()), } } + + fn get_global_alloc_salt( + ecx: &InterpCx<'tcx, Self>, + instance: Option<ty::Instance<'tcx>>, + ) -> usize { + let unique = if let Some(instance) = instance { + // Functions cannot be identified by pointers, as asm-equal functions can get + // deduplicated by the linker (we set the "unnamed_addr" attribute for LLVM) and + // functions can be duplicated across crates. We thus generate a new `AllocId` for every + // mention of a function. This means that `main as fn() == main as fn()` is false, while + // `let x = main as fn(); x == x` is true. However, as a quality-of-life feature it can + // be useful to identify certain functions uniquely, e.g. for backtraces. So we identify + // whether codegen will actually emit duplicate functions. It does that when they have + // non-lifetime generics, or when they can be inlined. All other functions are given a + // unique address. This is not a stable guarantee! The `inline` attribute is a hint and + // cannot be relied upon for anything. But if we don't do this, the + // `__rust_begin_short_backtrace`/`__rust_end_short_backtrace` logic breaks and panic + // backtraces look terrible. + let is_generic = instance + .args + .into_iter() + .any(|kind| !matches!(kind.unpack(), ty::GenericArgKind::Lifetime(_))); + let can_be_inlined = matches!( + ecx.tcx.sess.opts.unstable_opts.cross_crate_inline_threshold, + InliningThreshold::Always + ) || !matches!( + ecx.tcx.codegen_fn_attrs(instance.def_id()).inline, + InlineAttr::Never + ); + !is_generic && !can_be_inlined + } else { + // Non-functions are never unique. + false + }; + // Always use the same salt if the allocation is unique. + if unique { + CTFE_ALLOC_SALT + } else { + ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL + } + } } diff --git a/src/tools/miri/tests/pass/dyn-traits.rs b/src/tools/miri/tests/pass/dyn-traits.rs index 908d521a0d8..f6220120968 100644 --- a/src/tools/miri/tests/pass/dyn-traits.rs +++ b/src/tools/miri/tests/pass/dyn-traits.rs @@ -141,7 +141,17 @@ fn unsized_dyn_autoderef() { } */ +fn vtable_ptr_eq() { + use std::{fmt, ptr}; + + // We don't always get the same vtable when casting this to a wide pointer. + let x = &2; + let x_wide = x as &dyn fmt::Display; + assert!((0..256).any(|_| !ptr::eq(x as &dyn fmt::Display, x_wide))); +} + fn main() { ref_box_dyn(); box_box_trait(); + vtable_ptr_eq(); } diff --git a/src/tools/miri/tests/pass/function_pointers.rs b/src/tools/miri/tests/pass/function_pointers.rs index 2aa3ebf2dd0..a5c4bc5e0d9 100644 --- a/src/tools/miri/tests/pass/function_pointers.rs +++ b/src/tools/miri/tests/pass/function_pointers.rs @@ -82,7 +82,8 @@ fn main() { assert!(return_fn_ptr(i) == i); assert!(return_fn_ptr(i) as unsafe fn() -> i32 == i as fn() -> i32 as unsafe fn() -> i32); // Miri gives different addresses to different reifications of a generic function. - assert!(return_fn_ptr(f) != f); + // at least if we try often enough. + assert!((0..256).any(|_| return_fn_ptr(f) != f)); // However, if we only turn `f` into a function pointer and use that pointer, // it is equal to itself. let f2 = f as fn() -> i32; diff --git a/src/tools/miri/tests/pass/rc.rs b/src/tools/miri/tests/pass/rc.rs index 6dd1b3aff9e..b1470dabc26 100644 --- a/src/tools/miri/tests/pass/rc.rs +++ b/src/tools/miri/tests/pass/rc.rs @@ -75,7 +75,8 @@ fn rc_fat_ptr_eq() { let p = Rc::new(1) as Rc<dyn Debug>; let a: *const dyn Debug = &*p; let r = Rc::into_raw(p); - assert!(a == r); + // Only compare the pointer parts, as the vtable might differ. + assert!(a as *const () == r as *const ()); drop(unsafe { Rc::from_raw(r) }); } |
