about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRalf Jung <post@ralfj.de>2025-07-16 11:00:19 +0000
committerGitHub <noreply@github.com>2025-07-16 11:00:19 +0000
commitfe090dbb831fb32fb3fceb91f3b293a62509f2c5 (patch)
treeab77402ff6cfed70ea69a14590e167726b3aafb3
parented212ffece310d319f965a44f6054800c4f90f52 (diff)
parenta7818abc141a072ff069ed6c43ebbe56eade9c23 (diff)
downloadrust-fe090dbb831fb32fb3fceb91f3b293a62509f2c5.tar.gz
rust-fe090dbb831fb32fb3fceb91f3b293a62509f2c5.zip
Merge pull request #4459 from ibraheemdev/ibraheem/global-ctor
Add support for global constructors (i.e. life before main)
-rw-r--r--src/tools/miri/src/concurrency/thread.rs2
-rw-r--r--src/tools/miri/src/eval.rs84
-rw-r--r--src/tools/miri/src/helpers.rs9
-rw-r--r--src/tools/miri/src/shims/global_ctor.rs98
-rw-r--r--src/tools/miri/src/shims/mod.rs1
-rw-r--r--src/tools/miri/src/shims/tls.rs2
-rw-r--r--src/tools/miri/tests/pass/alloc-access-tracking.rs4
-rw-r--r--src/tools/miri/tests/pass/shims/ctor.rs46
8 files changed, 216 insertions, 30 deletions
diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs
index abfee0ee874..878afdf2517 100644
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@@ -677,6 +677,8 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
     fn run_on_stack_empty(&mut self) -> InterpResult<'tcx, Poll<()>> {
         let this = self.eval_context_mut();
         // Inform GenMC that a thread has finished all user code. GenMC needs to know this for scheduling.
+        // FIXME(GenMC): Thread-local destructors *are* user code, so this is odd. Also now that we
+        // support pre-main constructors, it can get called there as well.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
             let thread_id = this.active_thread();
             genmc_ctx.handle_thread_stack_empty(thread_id);
diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs
index 425a136dfa5..be6404f64e8 100644
--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@@ -11,14 +11,14 @@ use rustc_abi::ExternAbi;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_hir::def::Namespace;
 use rustc_hir::def_id::DefId;
-use rustc_middle::ty::layout::LayoutCx;
+use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutCx};
 use rustc_middle::ty::{self, Ty, TyCtxt};
 use rustc_session::config::EntryFnType;
 
 use crate::concurrency::GenmcCtx;
 use crate::concurrency::thread::TlsAllocAction;
 use crate::diagnostics::report_leaks;
-use crate::shims::tls;
+use crate::shims::{global_ctor, tls};
 use crate::*;
 
 #[derive(Copy, Clone, Debug)]
@@ -216,9 +216,17 @@ impl Default for MiriConfig {
 }
 
 /// The state of the main thread. Implementation detail of `on_main_stack_empty`.
-#[derive(Default, Debug)]
+#[derive(Debug)]
 enum MainThreadState<'tcx> {
-    #[default]
+    GlobalCtors {
+        ctor_state: global_ctor::GlobalCtorState<'tcx>,
+        /// The main function to call.
+        entry_id: DefId,
+        entry_type: MiriEntryFnType,
+        /// Arguments passed to `main`.
+        argc: ImmTy<'tcx>,
+        argv: ImmTy<'tcx>,
+    },
     Running,
     TlsDtors(tls::TlsDtorsState<'tcx>),
     Yield {
@@ -234,6 +242,15 @@ impl<'tcx> MainThreadState<'tcx> {
     ) -> InterpResult<'tcx, Poll<()>> {
         use MainThreadState::*;
         match self {
+            GlobalCtors { ctor_state, entry_id, entry_type, argc, argv } => {
+                match ctor_state.on_stack_empty(this)? {
+                    Poll::Pending => {} // just keep going
+                    Poll::Ready(()) => {
+                        call_main(this, *entry_id, *entry_type, argc.clone(), argv.clone())?;
+                        *self = Running;
+                    }
+                }
+            }
             Running => {
                 *self = TlsDtors(Default::default());
             }
@@ -309,13 +326,6 @@ pub fn create_ecx<'tcx>(
         MiriMachine::new(config, layout_cx, genmc_ctx),
     );
 
-    // Some parts of initialization require a full `InterpCx`.
-    MiriMachine::late_init(&mut ecx, config, {
-        let mut state = MainThreadState::default();
-        // Cannot capture anything GC-relevant here.
-        Box::new(move |m| state.on_main_stack_empty(m))
-    })?;
-
     // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
     let sentinel =
         helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
@@ -326,15 +336,9 @@ pub fn create_ecx<'tcx>(
         );
     }
 
-    // Setup first stack frame.
-    let entry_instance = ty::Instance::mono(tcx, entry_id);
-
-    // First argument is constructed later, because it's skipped for `miri_start.`
-
-    // Second argument (argc): length of `config.args`.
+    // Compute argc and argv from `config.args`.
     let argc =
         ImmTy::from_int(i64::try_from(config.args.len()).unwrap(), ecx.machine.layouts.isize);
-    // Third argument (`argv`): created from `config.args`.
     let argv = {
         // Put each argument in memory, collect pointers.
         let mut argvs = Vec::<Immediate<Provenance>>::with_capacity(config.args.len());
@@ -359,7 +363,7 @@ pub fn create_ecx<'tcx>(
             ecx.write_immediate(arg, &place)?;
         }
         ecx.mark_immutable(&argvs_place);
-        // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`.
+        // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`, and for the GC to see them.
         {
             let argc_place =
                 ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
@@ -374,7 +378,7 @@ pub fn create_ecx<'tcx>(
             ecx.machine.argv = Some(argv_place.ptr());
         }
         // Store command line as UTF-16 for Windows `GetCommandLineW`.
-        {
+        if tcx.sess.target.os == "windows" {
             // Construct a command string with all the arguments.
             let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
 
@@ -395,11 +399,43 @@ pub fn create_ecx<'tcx>(
         ImmTy::from_immediate(imm, layout)
     };
 
+    // Some parts of initialization require a full `InterpCx`.
+    MiriMachine::late_init(&mut ecx, config, {
+        let mut main_thread_state = MainThreadState::GlobalCtors {
+            entry_id,
+            entry_type,
+            argc,
+            argv,
+            ctor_state: global_ctor::GlobalCtorState::default(),
+        };
+
+        // Cannot capture anything GC-relevant here.
+        // `argc` and `argv` *are* GC_relevant, but they also get stored in `machine.argc` and
+        // `machine.argv` so we are good.
+        Box::new(move |m| main_thread_state.on_main_stack_empty(m))
+    })?;
+
+    interp_ok(ecx)
+}
+
+// Call the entry function.
+fn call_main<'tcx>(
+    ecx: &mut MiriInterpCx<'tcx>,
+    entry_id: DefId,
+    entry_type: MiriEntryFnType,
+    argc: ImmTy<'tcx>,
+    argv: ImmTy<'tcx>,
+) -> InterpResult<'tcx, ()> {
+    let tcx = ecx.tcx();
+
+    // Setup first stack frame.
+    let entry_instance = ty::Instance::mono(tcx, entry_id);
+
     // Return place (in static memory so that it does not count as leak).
     let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
     ecx.machine.main_fn_ret_place = Some(ret_place.clone());
-    // Call start function.
 
+    // Call start function.
     match entry_type {
         MiriEntryFnType::Rustc(EntryFnType::Main { .. }) => {
             let start_id = tcx.lang_items().start_fn().unwrap_or_else(|| {
@@ -409,7 +445,7 @@ pub fn create_ecx<'tcx>(
             let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
             let start_instance = ty::Instance::try_resolve(
                 tcx,
-                typing_env,
+                ecx.typing_env(),
                 start_id,
                 tcx.mk_args(&[ty::GenericArg::from(main_ret_ty)]),
             )
@@ -427,7 +463,7 @@ pub fn create_ecx<'tcx>(
                 ExternAbi::Rust,
                 &[
                     ImmTy::from_scalar(
-                        Scalar::from_pointer(main_ptr, &ecx),
+                        Scalar::from_pointer(main_ptr, ecx),
                         // FIXME use a proper fn ptr type
                         ecx.machine.layouts.const_raw_ptr,
                     ),
@@ -450,7 +486,7 @@ pub fn create_ecx<'tcx>(
         }
     }
 
-    interp_ok(ecx)
+    interp_ok(())
 }
 
 /// Evaluates the entry function specified by `entry_id`.
diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs
index 216b7b1e4bb..35d3e7e647c 100644
--- a/src/tools/miri/src/helpers.rs
+++ b/src/tools/miri/src/helpers.rs
@@ -1235,8 +1235,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         interp_ok(())
     }
 
-    /// Lookup an array of immediates stored as a linker section of name `name`.
-    fn lookup_link_section(&mut self, name: &str) -> InterpResult<'tcx, Vec<ImmTy<'tcx>>> {
+    /// Lookup an array of immediates from any linker sections matching the provided predicate.
+    fn lookup_link_section(
+        &mut self,
+        include_name: impl Fn(&str) -> bool,
+    ) -> InterpResult<'tcx, Vec<ImmTy<'tcx>>> {
         let this = self.eval_context_mut();
         let tcx = this.tcx.tcx;
 
@@ -1247,7 +1250,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             let Some(link_section) = attrs.link_section else {
                 return interp_ok(());
             };
-            if link_section.as_str() == name {
+            if include_name(link_section.as_str()) {
                 let instance = ty::Instance::mono(tcx, def_id);
                 let const_val = this.eval_global(instance).unwrap_or_else(|err| {
                     panic!(
diff --git a/src/tools/miri/src/shims/global_ctor.rs b/src/tools/miri/src/shims/global_ctor.rs
new file mode 100644
index 00000000000..c56251bbe63
--- /dev/null
+++ b/src/tools/miri/src/shims/global_ctor.rs
@@ -0,0 +1,98 @@
+//! Implement global constructors.
+
+use std::task::Poll;
+
+use rustc_abi::ExternAbi;
+use rustc_target::spec::BinaryFormat;
+
+use crate::*;
+
+#[derive(Debug, Default)]
+pub struct GlobalCtorState<'tcx>(GlobalCtorStatePriv<'tcx>);
+
+#[derive(Debug, Default)]
+enum GlobalCtorStatePriv<'tcx> {
+    #[default]
+    Init,
+    /// The list of constructor functions that we still have to call.
+    Ctors(Vec<ImmTy<'tcx>>),
+    Done,
+}
+
+impl<'tcx> GlobalCtorState<'tcx> {
+    pub fn on_stack_empty(
+        &mut self,
+        this: &mut MiriInterpCx<'tcx>,
+    ) -> InterpResult<'tcx, Poll<()>> {
+        use GlobalCtorStatePriv::*;
+        let new_state = 'new_state: {
+            match &mut self.0 {
+                Init => {
+                    let this = this.eval_context_mut();
+
+                    // Lookup constructors from the relevant magic link section.
+                    let ctors = match this.tcx.sess.target.binary_format {
+                        // Read the CRT library section on Windows.
+                        BinaryFormat::Coff =>
+                            this.lookup_link_section(|section| section == ".CRT$XCU")?,
+
+                        // Read the `__mod_init_func` section on macOS.
+                        BinaryFormat::MachO =>
+                            this.lookup_link_section(|section| {
+                                let mut parts = section.splitn(3, ',');
+                                let (segment_name, section_name, section_type) =
+                                    (parts.next(), parts.next(), parts.next());
+
+                                segment_name == Some("__DATA")
+                                    && section_name == Some("__mod_init_func")
+                                    // The `mod_init_funcs` directive ensures that the
+                                    // `S_MOD_INIT_FUNC_POINTERS` flag is set on the section. LLVM
+                                    // adds this automatically so we currently do not require it.
+                                    // FIXME: is this guaranteed LLVM behavior? If not, we shouldn't
+                                    // implicitly add it here. Also see
+                                    // <https://github.com/rust-lang/miri/pull/4459#discussion_r2200115629>.
+                                    && matches!(section_type, None | Some("mod_init_funcs"))
+                            })?,
+
+                        // Read the standard `.init_array` section on platforms that use ELF, or WASM,
+                        // which supports the same linker directive.
+                        // FIXME: Add support for `.init_array.N` and `.ctors`?
+                        BinaryFormat::Elf | BinaryFormat::Wasm =>
+                            this.lookup_link_section(|section| section == ".init_array")?,
+
+                        // Other platforms have no global ctor support.
+                        _ => break 'new_state Done,
+                    };
+
+                    break 'new_state Ctors(ctors);
+                }
+                Ctors(ctors) => {
+                    if let Some(ctor) = ctors.pop() {
+                        let this = this.eval_context_mut();
+
+                        let ctor = ctor.to_scalar().to_pointer(this)?;
+                        let thread_callback = this.get_ptr_fn(ctor)?.as_instance()?;
+
+                        // The signature of this function is `unsafe extern "C" fn()`.
+                        this.call_function(
+                            thread_callback,
+                            ExternAbi::C { unwind: false },
+                            &[],
+                            None,
+                            ReturnContinuation::Stop { cleanup: true },
+                        )?;
+
+                        return interp_ok(Poll::Pending); // we stay in this state (but `ctors` got shorter)
+                    }
+
+                    // No more constructors to run.
+                    break 'new_state Done;
+                }
+                Done => return interp_ok(Poll::Ready(())),
+            }
+        };
+
+        self.0 = new_state;
+        interp_ok(Poll::Pending)
+    }
+}
diff --git a/src/tools/miri/src/shims/mod.rs b/src/tools/miri/src/shims/mod.rs
index b08ab101e94..75540f6f150 100644
--- a/src/tools/miri/src/shims/mod.rs
+++ b/src/tools/miri/src/shims/mod.rs
@@ -14,6 +14,7 @@ mod x86;
 pub mod env;
 pub mod extern_static;
 pub mod foreign_items;
+pub mod global_ctor;
 pub mod io_error;
 pub mod os_str;
 pub mod panic;
diff --git a/src/tools/miri/src/shims/tls.rs b/src/tools/miri/src/shims/tls.rs
index 7182637437a..1200029692d 100644
--- a/src/tools/miri/src/shims/tls.rs
+++ b/src/tools/miri/src/shims/tls.rs
@@ -302,7 +302,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
         // Windows has a special magic linker section that is run on certain events.
         // We don't support most of that, but just enough to make thread-local dtors in `std` work.
-        interp_ok(this.lookup_link_section(".CRT$XLB")?)
+        interp_ok(this.lookup_link_section(|section| section == ".CRT$XLB")?)
     }
 
     fn schedule_windows_tls_dtor(&mut self, dtor: ImmTy<'tcx>) -> InterpResult<'tcx> {
diff --git a/src/tools/miri/tests/pass/alloc-access-tracking.rs b/src/tools/miri/tests/pass/alloc-access-tracking.rs
index 0e88951dc43..9eba0ca171b 100644
--- a/src/tools/miri/tests/pass/alloc-access-tracking.rs
+++ b/src/tools/miri/tests/pass/alloc-access-tracking.rs
@@ -1,7 +1,7 @@
 #![no_std]
 #![no_main]
-//@compile-flags: -Zmiri-track-alloc-id=20 -Zmiri-track-alloc-accesses -Cpanic=abort
-//@normalize-stderr-test: "id 20" -> "id $$ALLOC"
+//@compile-flags: -Zmiri-track-alloc-id=19 -Zmiri-track-alloc-accesses -Cpanic=abort
+//@normalize-stderr-test: "id 19" -> "id $$ALLOC"
 //@only-target: linux # alloc IDs differ between OSes (due to extern static allocations)
 
 extern "Rust" {
diff --git a/src/tools/miri/tests/pass/shims/ctor.rs b/src/tools/miri/tests/pass/shims/ctor.rs
new file mode 100644
index 00000000000..b997d2386b8
--- /dev/null
+++ b/src/tools/miri/tests/pass/shims/ctor.rs
@@ -0,0 +1,46 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+static COUNT: AtomicUsize = AtomicUsize::new(0);
+
+unsafe extern "C" fn ctor() {
+    COUNT.fetch_add(1, Ordering::Relaxed);
+}
+
+#[rustfmt::skip]
+macro_rules! ctor {
+    ($ident:ident = $ctor:ident) => {
+        #[cfg_attr(
+            all(any(
+                target_os = "linux",
+                target_os = "android",
+                target_os = "dragonfly",
+                target_os = "freebsd",
+                target_os = "haiku",
+                target_os = "illumos",
+                target_os = "netbsd",
+                target_os = "openbsd",
+                target_os = "solaris",
+                target_os = "none",
+                target_family = "wasm",
+            )),
+            link_section = ".init_array"
+        )]
+        #[cfg_attr(windows, link_section = ".CRT$XCU")]
+        #[cfg_attr(
+            any(target_os = "macos", target_os = "ios"),
+            // We do not set the `mod_init_funcs` flag here since ctor/inventory also do not do
+            // that. See <https://github.com/rust-lang/miri/pull/4459#discussion_r2200115629>.
+            link_section = "__DATA,__mod_init_func"
+        )]
+        #[used]
+        static $ident: unsafe extern "C" fn() = $ctor;
+    };
+}
+
+ctor! { CTOR1 = ctor }
+ctor! { CTOR2 = ctor }
+ctor! { CTOR3 = ctor }
+
+fn main() {
+    assert_eq!(COUNT.load(Ordering::Relaxed), 3, "ctors did not run");
+}