13 files changed, 2342 insertions, 0 deletions
diff --git a/library/std/src/sys/thread/hermit.rs b/library/std/src/sys/thread/hermit.rs
new file mode 100644
index 00000000000..4d9f3b114c2
--- /dev/null
+++ b/library/std/src/sys/thread/hermit.rs
@@ -0,0 +1,92 @@
+use crate::num::NonZero;
+use crate::time::Duration;
+use crate::{io, ptr};
+
+pub type Tid = hermit_abi::Tid;
+
+pub struct Thread {
+    tid: Tid,
+}
+
+unsafe impl Send for Thread {}
+unsafe impl Sync for Thread {}
+
+pub const DEFAULT_MIN_STACK_SIZE: usize = 1 << 20;
+
+impl Thread {
+    pub unsafe fn new_with_coreid(
+        stack: usize,
+        p: Box<dyn FnOnce()>,
+        core_id: isize,
+    ) -> io::Result<Thread> {
+        let p = Box::into_raw(Box::new(p));
+        let tid = unsafe {
+            hermit_abi::spawn2(
+                thread_start,
+                p.expose_provenance(),
+                hermit_abi::Priority::into(hermit_abi::NORMAL_PRIO),
+                stack,
+                core_id,
+            )
+        };
+
+        return if tid == 0 {
+            // The thread failed to start and as a result p was not consumed. Therefore, it is
+            // safe to reconstruct the box so that it gets deallocated.
+            unsafe {
+                drop(Box::from_raw(p));
+            }
+            Err(io::const_error!(io::ErrorKind::Uncategorized, "unable to create thread!"))
+        } else {
+            Ok(Thread { tid })
+        };
+
+        extern "C" fn thread_start(main: usize) {
+            unsafe {
+                // Finally, let's run some code.
+                Box::from_raw(ptr::with_exposed_provenance::<Box<dyn FnOnce()>>(main).cast_mut())();
+
+                // run all destructors
+                crate::sys::thread_local::destructors::run();
+                crate::rt::thread_cleanup();
+            }
+        }
+    }
+
+    pub unsafe fn new(
+        stack: usize,
+        _name: Option<&str>,
+        p: Box<dyn FnOnce()>,
+    ) -> io::Result<Thread> {
+        unsafe {
+            Thread::new_with_coreid(stack, p, -1 /* = no specific core */)
+        }
+    }
+
+    pub fn join(self) {
+        unsafe {
+            let _ = hermit_abi::join(self.tid);
+        }
+    }
+}
+
+pub fn available_parallelism() -> io::Result<NonZero<usize>> {
+    unsafe { Ok(NonZero::new_unchecked(hermit_abi::available_parallelism())) }
+}
+
+#[inline]
+pub fn sleep(dur: Duration) {
+    let micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
+    let micros = u64::try_from(micros).unwrap_or(u64::MAX);
+
+    unsafe {
+        hermit_abi::usleep(micros);
+    }
+}
+
+#[inline]
+pub fn yield_now() {
+    unsafe {
+        hermit_abi::yield_now();
+    }
+}
diff --git a/library/std/src/sys/thread/mod.rs b/library/std/src/sys/thread/mod.rs
new file mode 100644
index 00000000000..6bb7fc1a20e
--- /dev/null
+++ b/library/std/src/sys/thread/mod.rs
@@ -0,0 +1,152 @@
+cfg_select! {
+    target_os = "hermit" => {
+        mod hermit;
+        pub use hermit::{Thread, available_parallelism, sleep, yield_now, DEFAULT_MIN_STACK_SIZE};
+        #[expect(dead_code)]
+        mod unsupported;
+        pub use unsupported::{current_os_id, set_name};
+    }
+    all(target_vendor = "fortanix", target_env = "sgx") => {
+        mod sgx;
+        pub use sgx::{Thread, current_os_id, sleep, yield_now, DEFAULT_MIN_STACK_SIZE};
+
+        // SGX should protect in-enclave data from outside attackers, so there
+        // must not be any data leakage to the OS, particularly no 1-1 mapping
+        // between SGX thread names and OS thread names. Hence `set_name` is
+        // intentionally a no-op.
+        //
+        // Note that the internally visible SGX thread name is already provided
+        // by the platform-agnostic Rust thread code. This can be observed in
+        // the [`std::thread::tests::test_named_thread`] test, which succeeds
+        // as-is with the SGX target.
+        #[expect(dead_code)]
+        mod unsupported;
+        pub use unsupported::{available_parallelism, set_name};
+    }
+    target_os = "solid_asp3" => {
+        mod solid;
+        pub use solid::{Thread, sleep, yield_now, DEFAULT_MIN_STACK_SIZE};
+        #[expect(dead_code)]
+        mod unsupported;
+        pub use unsupported::{available_parallelism, current_os_id, set_name};
+    }
+    target_os = "teeos" => {
+        mod teeos;
+        pub use teeos::{Thread, sleep, yield_now, DEFAULT_MIN_STACK_SIZE};
+        #[expect(dead_code)]
+        mod unsupported;
+        pub use unsupported::{available_parallelism, current_os_id, set_name};
+    }
+    target_os = "uefi" => {
+        mod uefi;
+        pub use uefi::{available_parallelism, sleep};
+        #[expect(dead_code)]
+        mod unsupported;
+        pub use unsupported::{Thread, current_os_id, set_name, yield_now, DEFAULT_MIN_STACK_SIZE};
+    }
+    target_family = "unix" => {
+        mod unix;
+        pub use unix::{Thread, available_parallelism, current_os_id, sleep, yield_now, DEFAULT_MIN_STACK_SIZE};
+        #[cfg(not(any(
+            target_env = "newlib",
+            target_os = "l4re",
+            target_os = "emscripten",
+            target_os = "redox",
+            target_os = "hurd",
+            target_os = "aix",
+        )))]
+        pub use unix::set_name;
+        #[cfg(any(
+            target_os = "freebsd",
+            target_os = "netbsd",
+            target_os = "linux",
+            target_os = "android",
+            target_os = "solaris",
+            target_os = "illumos",
+            target_os = "dragonfly",
+            target_os = "hurd",
+            target_os = "fuchsia",
+            target_os = "vxworks",
+        ))]
+        pub use unix::sleep_until;
+        #[expect(dead_code)]
+        mod unsupported;
+        #[cfg(any(
+            target_env = "newlib",
+            target_os = "l4re",
+            target_os = "emscripten",
+            target_os = "redox",
+            target_os = "hurd",
+            target_os = "aix",
+        ))]
+        pub use unsupported::set_name;
+    }
+    all(target_os = "wasi", target_env = "p1") => {
+        mod wasip1;
+        pub use wasip1::{DEFAULT_MIN_STACK_SIZE, sleep, yield_now};
+        #[cfg(target_feature = "atomics")]
+        pub use wasip1::{Thread, available_parallelism};
+        #[expect(dead_code)]
+        mod unsupported;
+        pub use unsupported::{current_os_id, set_name};
+        #[cfg(not(target_feature = "atomics"))]
+        pub use unsupported::{Thread, available_parallelism};
+    }
+    all(target_os = "wasi", target_env = "p2") => {
+        mod wasip2;
+        pub use wasip2::{sleep, sleep_until};
+        #[expect(dead_code)]
+        mod unsupported;
+        // Note that unlike WASIp1 even if the wasm `atomics` feature is enabled
+        // there is no support for threads, not even experimentally, not even in
+        // wasi-libc. Thus this is unconditionally unsupported.
+        pub use unsupported::{Thread, available_parallelism, current_os_id, set_name, yield_now, DEFAULT_MIN_STACK_SIZE};
+    }
+    all(target_family = "wasm", target_feature = "atomics") => {
+        mod wasm;
+        pub use wasm::sleep;
+
+        #[expect(dead_code)]
+        mod unsupported;
+        pub use unsupported::{Thread, available_parallelism, current_os_id, set_name, yield_now, DEFAULT_MIN_STACK_SIZE};
+    }
+    target_os = "windows" => {
+        mod windows;
+        pub use windows::{Thread, available_parallelism, current_os_id, set_name, set_name_wide, sleep, yield_now, DEFAULT_MIN_STACK_SIZE};
+    }
+    target_os = "xous" => {
+        mod xous;
+        pub use xous::{Thread, available_parallelism, sleep, yield_now, DEFAULT_MIN_STACK_SIZE};
+
+        #[expect(dead_code)]
+        mod unsupported;
+        pub use unsupported::{current_os_id, set_name};
+    }
+    _ => {
+        mod unsupported;
+        pub use unsupported::{Thread, available_parallelism, current_os_id, set_name, sleep, yield_now, DEFAULT_MIN_STACK_SIZE};
+    }
+}
+
+#[cfg(not(any(
+    target_os = "freebsd",
+    target_os = "netbsd",
+    target_os = "linux",
+    target_os = "android",
+    target_os = "solaris",
+    target_os = "illumos",
+    target_os = "dragonfly",
+    target_os = "hurd",
+    target_os = "fuchsia",
+    target_os = "vxworks",
+    all(target_os = "wasi", target_env = "p2"),
+)))]
+pub fn sleep_until(deadline: crate::time::Instant) {
+    use crate::time::Instant;
+
+    let now = Instant::now();
+
+    if let Some(delay) = deadline.checked_duration_since(now) {
+        sleep(delay);
+    }
+}
diff --git a/library/std/src/sys/thread/sgx.rs b/library/std/src/sys/thread/sgx.rs
new file mode 100644
index 00000000000..f20ef7d86b9
--- /dev/null
+++ b/library/std/src/sys/thread/sgx.rs
@@ -0,0 +1,131 @@
+#![cfg_attr(test, allow(dead_code))] // why is this necessary?
+
+use crate::io;
+use crate::sys::pal::abi::{thread, usercalls};
+use crate::time::Duration;
+
+pub struct Thread(task_queue::JoinHandle);
+
+pub const DEFAULT_MIN_STACK_SIZE: usize = 4096;
+
+pub use self::task_queue::JoinNotifier;
+
+mod task_queue {
+    use super::wait_notify;
+    use crate::sync::{Mutex, MutexGuard};
+
+    pub type JoinHandle = wait_notify::Waiter;
+
+    pub struct JoinNotifier(Option<wait_notify::Notifier>);
+
+    impl Drop for JoinNotifier {
+        fn drop(&mut self) {
+            self.0.take().unwrap().notify();
+        }
+    }
+
+    pub(super) struct Task {
+        p: Box<dyn FnOnce() + Send>,
+        done: JoinNotifier,
+    }
+
+    impl Task {
+        pub(super) fn new(p: Box<dyn FnOnce() + Send>) -> (Task, JoinHandle) {
+            let (done, recv) = wait_notify::new();
+            let done = JoinNotifier(Some(done));
+            (Task { p, done }, recv)
+        }
+
+        pub(super) fn run(self) -> JoinNotifier {
+            (self.p)();
+            self.done
+        }
+    }
+
+    // Specifying linkage/symbol name is solely to ensure a single instance between this crate and its unit tests
+    #[cfg_attr(test, linkage = "available_externally")]
+    #[unsafe(export_name = "_ZN16__rust_internals3std3sys3pal3sgx6thread10TASK_QUEUEE")]
+    static TASK_QUEUE: Mutex<Vec<Task>> = Mutex::new(Vec::new());
+
+    pub(super) fn lock() -> MutexGuard<'static, Vec<Task>> {
+        TASK_QUEUE.lock().unwrap()
+    }
+}
+
+/// This module provides a synchronization primitive that does not use thread
+/// local variables. This is needed for signaling that a thread has finished
+/// execution. The signal is sent once all TLS destructors have finished at
+/// which point no new thread locals should be created.
+pub mod wait_notify {
+    use crate::pin::Pin;
+    use crate::sync::Arc;
+    use crate::sys::sync::Parker;
+
+    pub struct Notifier(Arc<Parker>);
+
+    impl Notifier {
+        /// Notify the waiter. The waiter is either notified right away (if
+        /// currently blocked in `Waiter::wait()`) or later when it calls the
+        /// `Waiter::wait()` method.
+        pub fn notify(self) {
+            Pin::new(&*self.0).unpark()
+        }
+    }
+
+    pub struct Waiter(Arc<Parker>);
+
+    impl Waiter {
+        /// Wait for a notification. If `Notifier::notify()` has already been
+        /// called, this will return immediately, otherwise the current thread
+        /// is blocked until notified.
+        pub fn wait(self) {
+            // SAFETY:
+            // This is only ever called on one thread.
+            unsafe { Pin::new(&*self.0).park() }
+        }
+    }
+
+    pub fn new() -> (Notifier, Waiter) {
+        let inner = Arc::new(Parker::new());
+        (Notifier(inner.clone()), Waiter(inner))
+    }
+}
+
+impl Thread {
+    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
+    pub unsafe fn new(
+        _stack: usize,
+        _name: Option<&str>,
+        p: Box<dyn FnOnce() + Send>,
+    ) -> io::Result<Thread> {
+        let mut queue_lock = task_queue::lock();
+        unsafe { usercalls::launch_thread()? };
+        let (task, handle) = task_queue::Task::new(p);
+        queue_lock.push(task);
+        Ok(Thread(handle))
+    }
+
+    pub(crate) fn entry() -> JoinNotifier {
+        let mut pending_tasks = task_queue::lock();
+        let task = rtunwrap!(Some, pending_tasks.pop());
+        drop(pending_tasks); // make sure to not hold the task queue lock longer than necessary
+        task.run()
+    }
+
+    pub fn join(self) {
+        self.0.wait();
+    }
+}
+
+pub fn current_os_id() -> Option<u64> {
+    Some(thread::current().addr().get() as u64)
+}
+
+pub fn sleep(dur: Duration) {
+    usercalls::wait_timeout(0, dur, || true);
+}
+
+pub fn yield_now() {
+    let wait_error = rtunwrap!(Err, usercalls::wait(0, usercalls::raw::WAIT_NO));
+    rtassert!(wait_error.kind() == io::ErrorKind::WouldBlock);
+}
diff --git a/library/std/src/sys/thread/solid.rs b/library/std/src/sys/thread/solid.rs
new file mode 100644
index 00000000000..46a84faa802
--- /dev/null
+++ b/library/std/src/sys/thread/solid.rs
@@ -0,0 +1,348 @@
+//! Thread implementation backed by μITRON tasks. Assumes `acre_tsk` and
+//! `exd_tsk` are available.
+
+use crate::cell::UnsafeCell;
+use crate::mem::ManuallyDrop;
+use crate::ptr::NonNull;
+use crate::sync::atomic::{Atomic, AtomicUsize, Ordering};
+use crate::sys::pal::itron::error::{ItronError, expect_success, expect_success_aborting};
+use crate::sys::pal::itron::time::dur2reltims;
+use crate::sys::pal::itron::{abi, task};
+use crate::time::Duration;
+use crate::{hint, io};
+
+pub struct Thread {
+    p_inner: NonNull<ThreadInner>,
+
+    /// The ID of the underlying task.
+    task: abi::ID,
+}
+
+// Safety: There's nothing in `Thread` that ties it to the original creator. It
+//         can be dropped by any threads.
+unsafe impl Send for Thread {}
+// Safety: `Thread` provides no methods that take `&self`.
+unsafe impl Sync for Thread {}
+
+/// State data shared between a parent thread and child thread. It's dropped on
+/// a transition to one of the final states.
+struct ThreadInner {
+    /// This field is used on thread creation to pass a closure from
+    /// `Thread::new` to the created task.
+    start: UnsafeCell<ManuallyDrop<Box<dyn FnOnce()>>>,
+
+    /// A state machine. Each transition is annotated with `[...]` in the
+    /// source code.
+    ///
+    /// ```text
+    ///
+    ///    <P>: parent, <C>: child, (?): don't-care
+    ///
+    ///       DETACHED (-1)  -------------------->  EXITED (?)
+    ///                        <C>finish/exd_tsk
+    ///          ^
+    ///          |
+    ///          | <P>detach
+    ///          |
+    ///
+    ///       INIT (0)  ----------------------->  FINISHED (-1)
+    ///                        <C>finish
+    ///          |                                    |
+    ///          | <P>join/slp_tsk                    | <P>join/del_tsk
+    ///          |                                    | <P>detach/del_tsk
+    ///          v                                    v
+    ///
+    ///       JOINING                              JOINED (?)
+    ///     (parent_tid)
+    ///                                            ^
+    ///             \                             /
+    ///              \  <C>finish/wup_tsk        / <P>slp_tsk-complete/ter_tsk
+    ///               \                         /                      & del_tsk
+    ///                \                       /
+    ///                 '--> JOIN_FINALIZE ---'
+    ///                          (-1)
+    ///
+    lifecycle: Atomic<usize>,
+}
+
+// Safety: The only `!Sync` field, `ThreadInner::start`, is only touched by
+//         the task represented by `ThreadInner`.
+unsafe impl Sync for ThreadInner {}
+
+const LIFECYCLE_INIT: usize = 0;
+const LIFECYCLE_FINISHED: usize = usize::MAX;
+const LIFECYCLE_DETACHED: usize = usize::MAX;
+const LIFECYCLE_JOIN_FINALIZE: usize = usize::MAX;
+const LIFECYCLE_DETACHED_OR_JOINED: usize = usize::MAX;
+const LIFECYCLE_EXITED_OR_FINISHED_OR_JOIN_FINALIZE: usize = usize::MAX;
+// there's no single value for `JOINING`
+
+// 64KiB for 32-bit ISAs, 128KiB for 64-bit ISAs.
+pub const DEFAULT_MIN_STACK_SIZE: usize = 0x4000 * size_of::<usize>();
+
+impl Thread {
+    /// # Safety
+    ///
+    /// See `thread::Builder::spawn_unchecked` for safety requirements.
+    pub unsafe fn new(
+        stack: usize,
+        _name: Option<&str>,
+        p: Box<dyn FnOnce()>,
+    ) -> io::Result<Thread> {
+        let inner = Box::new(ThreadInner {
+            start: UnsafeCell::new(ManuallyDrop::new(p)),
+            lifecycle: AtomicUsize::new(LIFECYCLE_INIT),
+        });
+
+        unsafe extern "C" fn trampoline(exinf: isize) {
+            let p_inner: *mut ThreadInner = crate::ptr::with_exposed_provenance_mut(exinf as usize);
+            // Safety: `ThreadInner` is alive at this point
+            let inner = unsafe { &*p_inner };
+
+            // Safety: Since `trampoline` is called only once for each
+            //         `ThreadInner` and only `trampoline` touches `start`,
+            //         `start` contains contents and is safe to mutably borrow.
+            let p = unsafe { ManuallyDrop::take(&mut *inner.start.get()) };
+            p();
+
+            // Fix the current thread's state just in case, so that the
+            // destructors won't abort
+            // Safety: Not really unsafe
+            let _ = unsafe { abi::unl_cpu() };
+            let _ = unsafe { abi::ena_dsp() };
+
+            // Run TLS destructors now because they are not
+            // called automatically for terminated tasks.
+            unsafe { crate::sys::thread_local::destructors::run() };
+
+            let old_lifecycle = inner
+                .lifecycle
+                .swap(LIFECYCLE_EXITED_OR_FINISHED_OR_JOIN_FINALIZE, Ordering::AcqRel);
+
+            match old_lifecycle {
+                LIFECYCLE_DETACHED => {
+                    // [DETACHED → EXITED]
+                    // No one will ever join, so we'll ask the collector task to
+                    // delete the task.
+
+                    // In this case, `*p_inner`'s ownership has been moved to
+                    // us, and we are responsible for dropping it. The acquire
+                    // ordering ensures that the swap operation that wrote
+                    // `LIFECYCLE_DETACHED` happens-before `Box::from_raw(
+                    // p_inner)`.
+                    // Safety: See above.
+                    let _ = unsafe { Box::from_raw(p_inner) };
+
+                    // Safety: There are no pinned references to the stack
+                    unsafe { terminate_and_delete_current_task() };
+                }
+                LIFECYCLE_INIT => {
+                    // [INIT → FINISHED]
+                    // The parent hasn't decided whether to join or detach this
+                    // thread yet. Whichever option the parent chooses,
+                    // it'll have to delete this task.
+                    // Since the parent might drop `*inner` as soon as it sees
+                    // `FINISHED`, the release ordering must be used in the
+                    // above `swap` call.
+                }
+                parent_tid => {
+                    // Since the parent might drop `*inner` and terminate us as
+                    // soon as it sees `JOIN_FINALIZE`, the release ordering
+                    // must be used in the above `swap` call.
+                    //
+                    // To make the task referred to by `parent_tid` visible, we
+                    // must use the acquire ordering in the above `swap` call.
+
+                    // [JOINING → JOIN_FINALIZE]
+                    // Wake up the parent task.
+                    expect_success(
+                        unsafe {
+                            let mut er = abi::wup_tsk(parent_tid as _);
+                            if er == abi::E_QOVR {
+                                // `E_QOVR` indicates there's already
+                                // a parking token
+                                er = abi::E_OK;
+                            }
+                            er
+                        },
+                        &"wup_tsk",
+                    );
+                }
+            }
+        }
+
+        // Safety: `Box::into_raw` returns a non-null pointer
+        let p_inner = unsafe { NonNull::new_unchecked(Box::into_raw(inner)) };
+
+        let new_task = ItronError::err_if_negative(unsafe {
+            abi::acre_tsk(&abi::T_CTSK {
+                // Activate this task immediately
+                tskatr: abi::TA_ACT,
+                exinf: p_inner.as_ptr().expose_provenance() as abi::EXINF,
+                // The entry point
+                task: Some(trampoline),
+                // Inherit the calling task's base priority
+                itskpri: abi::TPRI_SELF,
+                stksz: stack,
+                // Let the kernel allocate the stack,
+                stk: crate::ptr::null_mut(),
+            })
+        })
+        .map_err(|e| e.as_io_error())?;
+
+        Ok(Self { p_inner, task: new_task })
+    }
+
+    pub fn join(self) {
+        // Safety: `ThreadInner` is alive at this point
+        let inner = unsafe { self.p_inner.as_ref() };
+        // Get the current task ID. Panicking here would cause a resource leak,
+        // so just abort on failure.
+        let current_task = task::current_task_id_aborting();
+        debug_assert!(usize::try_from(current_task).is_ok());
+        debug_assert_ne!(current_task as usize, LIFECYCLE_INIT);
+        debug_assert_ne!(current_task as usize, LIFECYCLE_DETACHED);
+
+        let current_task = current_task as usize;
+
+        match inner.lifecycle.swap(current_task, Ordering::AcqRel) {
+            LIFECYCLE_INIT => {
+                // [INIT → JOINING]
+                // The child task will transition the state to `JOIN_FINALIZE`
+                // and wake us up.
+                //
+                // To make the task referred to by `current_task` visible from
+                // the child task's point of view, we must use the release
+                // ordering in the above `swap` call.
+                loop {
+                    expect_success_aborting(unsafe { abi::slp_tsk() }, &"slp_tsk");
+                    // To synchronize with the child task's memory accesses to
+                    // `inner` up to the point of the assignment of
+                    // `JOIN_FINALIZE`, `Ordering::Acquire` must be used for the
+                    // `load`.
+                    if inner.lifecycle.load(Ordering::Acquire) == LIFECYCLE_JOIN_FINALIZE {
+                        break;
+                    }
+                }
+
+                // [JOIN_FINALIZE → JOINED]
+            }
+            LIFECYCLE_FINISHED => {
+                // [FINISHED → JOINED]
+                // To synchronize with the child task's memory accesses to
+                // `inner` up to the point of the assignment of `FINISHED`,
+                // `Ordering::Acquire` must be used for the above `swap` call.
+            }
+            _ => unsafe { hint::unreachable_unchecked() },
+        }
+
+        // Terminate and delete the task
+        // Safety: `self.task` still represents a task we own (because this
+        //         method or `detach_inner` is called only once for each
+        //         `Thread`). The task indicated that it's safe to delete by
+        //         entering the `FINISHED` or `JOIN_FINALIZE` state.
+        unsafe { terminate_and_delete_task(self.task) };
+
+        // In either case, we are responsible for dropping `inner`.
+        // Safety: The contents of `*p_inner` will not be accessed hereafter
+        let _inner = unsafe { Box::from_raw(self.p_inner.as_ptr()) };
+
+        // Skip the destructor (because it would attempt to detach the thread)
+        crate::mem::forget(self);
+    }
+}
+
+impl Drop for Thread {
+    fn drop(&mut self) {
+        // Safety: `ThreadInner` is alive at this point
+        let inner = unsafe { self.p_inner.as_ref() };
+
+        // Detach the thread.
+        match inner.lifecycle.swap(LIFECYCLE_DETACHED_OR_JOINED, Ordering::AcqRel) {
+            LIFECYCLE_INIT => {
+                // [INIT → DETACHED]
+                // When the time comes, the child will figure out that no
+                // one will ever join it.
+                // The ownership of `*p_inner` is moved to the child thread.
+                // The release ordering ensures that the above swap operation on
+                // `lifecycle` happens-before the child thread's
+                // `Box::from_raw(p_inner)`.
+            }
+            LIFECYCLE_FINISHED => {
+                // [FINISHED → JOINED]
+                // The task has already decided that we should delete the task.
+                // To synchronize with the child task's memory accesses to
+                // `inner` up to the point of the assignment of `FINISHED`,
+                // the acquire ordering is required for the above `swap` call.
+
+                // Terminate and delete the task
+                // Safety: `self.task` still represents a task we own (because
+                //         this method or `join_inner` is called only once for
+                //         each `Thread`). The task indicated that it's safe to
+                //         delete by entering the `FINISHED` state.
+                unsafe { terminate_and_delete_task(self.task) };
+
+                // Wwe are responsible for dropping `*p_inner`.
+                // Safety: The contents of `*p_inner` will not be accessed hereafter
+                let _ = unsafe { Box::from_raw(self.p_inner.as_ptr()) };
+            }
+            _ => unsafe { hint::unreachable_unchecked() },
+        }
+    }
+}
+
+/// Terminates and deletes the specified task.
+///
+/// This function will abort if `deleted_task` refers to the calling task.
+///
+/// It is assumed that the specified task is solely managed by the caller -
+/// i.e., other threads must not "resuscitate" the specified task or delete it
+/// prematurely while this function is still in progress. It is allowed for the
+/// specified task to exit by its own.
+///
+/// # Safety
+///
+/// The task must be safe to terminate. This is in general not true
+/// because there might be pinned references to the task's stack.
+unsafe fn terminate_and_delete_task(deleted_task: abi::ID) {
+    // Terminate the task
+    // Safety: Upheld by the caller
+    match unsafe { abi::ter_tsk(deleted_task) } {
+        // Indicates the task is already dormant, ignore it
+        abi::E_OBJ => {}
+        er => {
+            expect_success_aborting(er, &"ter_tsk");
+        }
+    }
+
+    // Delete the task
+    // Safety: Upheld by the caller
+    expect_success_aborting(unsafe { abi::del_tsk(deleted_task) }, &"del_tsk");
+}
+
+/// Terminates and deletes the calling task.
+///
+/// Atomicity is not required - i.e., it can be assumed that other threads won't
+/// `ter_tsk` the calling task while this function is still in progress. (This
+/// property makes it easy to implement this operation on μITRON-derived kernels
+/// that don't support `exd_tsk`.)
+///
+/// # Safety
+///
+/// The task must be safe to terminate. This is in general not true
+/// because there might be pinned references to the task's stack.
+unsafe fn terminate_and_delete_current_task() -> ! {
+    expect_success_aborting(unsafe { abi::exd_tsk() }, &"exd_tsk");
+    // Safety: `exd_tsk` never returns on success
+    unsafe { crate::hint::unreachable_unchecked() };
+}
+
+pub fn yield_now() {
+    expect_success(unsafe { abi::rot_rdq(abi::TPRI_SELF) }, &"rot_rdq");
+}
+
+pub fn sleep(dur: Duration) {
+    for timeout in dur2reltims(dur) {
+        expect_success(unsafe { abi::dly_tsk(timeout) }, &"dly_tsk");
+    }
+}
diff --git a/library/std/src/sys/thread/teeos.rs b/library/std/src/sys/thread/teeos.rs
new file mode 100644
index 00000000000..cad100395c9
--- /dev/null
+++ b/library/std/src/sys/thread/teeos.rs
@@ -0,0 +1,126 @@
+use crate::mem::{self, ManuallyDrop};
+use crate::sys::os;
+use crate::time::Duration;
+use crate::{cmp, io, ptr};
+
+pub const DEFAULT_MIN_STACK_SIZE: usize = 8 * 1024;
+
+unsafe extern "C" {
+    safe fn TEE_Wait(timeout: u32) -> u32;
+}
+
+fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
+    libc::PTHREAD_STACK_MIN.try_into().expect("Infallible")
+}
+
+pub struct Thread {
+    id: libc::pthread_t,
+}
+
+// Some platforms may have pthread_t as a pointer in which case we still want
+// a thread to be Send/Sync
+unsafe impl Send for Thread {}
+unsafe impl Sync for Thread {}
+
+impl Thread {
+    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
+    pub unsafe fn new(
+        stack: usize,
+        _name: Option<&str>,
+        p: Box<dyn FnOnce()>,
+    ) -> io::Result<Thread> {
+        let p = Box::into_raw(Box::new(p));
+        let mut native: libc::pthread_t = unsafe { mem::zeroed() };
+        let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() };
+        assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0);
+        assert_eq!(
+            unsafe {
+                libc::pthread_attr_settee(
+                    &mut attr,
+                    libc::TEESMP_THREAD_ATTR_CA_INHERIT,
+                    libc::TEESMP_THREAD_ATTR_TASK_ID_INHERIT,
+                    libc::TEESMP_THREAD_ATTR_HAS_SHADOW,
+                )
+            },
+            0,
+        );
+
+        let stack_size = cmp::max(stack, min_stack_size(&attr));
+
+        match unsafe { libc::pthread_attr_setstacksize(&mut attr, stack_size) } {
+            0 => {}
+            n => {
+                assert_eq!(n, libc::EINVAL);
+                // EINVAL means |stack_size| is either too small or not a
+                // multiple of the system page size.  Because it's definitely
+                // >= PTHREAD_STACK_MIN, it must be an alignment issue.
+                // Round up to the nearest page and try again.
+                let page_size = os::page_size();
+                let stack_size =
+                    (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
+                assert_eq!(unsafe { libc::pthread_attr_setstacksize(&mut attr, stack_size) }, 0);
+            }
+        };
+
+        let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) };
+        // Note: if the thread creation fails and this assert fails, then p will
+        // be leaked. However, an alternative design could cause double-free
+        // which is clearly worse.
+        assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0);
+
+        return if ret != 0 {
+            // The thread failed to start and as a result p was not consumed. Therefore, it is
+            // safe to reconstruct the box so that it gets deallocated.
+            drop(unsafe { Box::from_raw(p) });
+            Err(io::Error::from_raw_os_error(ret))
+        } else {
+            // The new thread will start running earliest after the next yield.
+            // We add a yield here, so that the user does not have to.
+            yield_now();
+            Ok(Thread { id: native })
+        };
+
+        extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void {
+            unsafe {
+                // Next, set up our stack overflow handler which may get triggered if we run
+                // out of stack.
+                // this is not necessary in TEE.
+                //let _handler = stack_overflow::Handler::new();
+                // Finally, let's run some code.
+                Box::from_raw(main as *mut Box<dyn FnOnce()>)();
+            }
+            ptr::null_mut()
+        }
+    }
+
+    /// must join, because no pthread_detach supported
+    pub fn join(self) {
+        let id = self.into_id();
+        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
+        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
+    }
+
+    pub fn into_id(self) -> libc::pthread_t {
+        ManuallyDrop::new(self).id
+    }
+}
+
+impl Drop for Thread {
+    fn drop(&mut self) {
+        // we can not call detach, so just panic if thread spawn without join
+        panic!("thread must join, detach is not supported!");
+    }
+}
+
+pub fn yield_now() {
+    let ret = unsafe { libc::sched_yield() };
+    debug_assert_eq!(ret, 0);
+}
+
+/// only main thread could wait for sometime in teeos
+pub fn sleep(dur: Duration) {
+    let sleep_millis = dur.as_millis();
+    let final_sleep: u32 =
+        if sleep_millis >= u32::MAX as u128 { u32::MAX } else { sleep_millis as u32 };
+    TEE_Wait(final_sleep);
+}
diff --git a/library/std/src/sys/thread/uefi.rs b/library/std/src/sys/thread/uefi.rs
new file mode 100644
index 00000000000..94f67d7ace2
--- /dev/null
+++ b/library/std/src/sys/thread/uefi.rs
@@ -0,0 +1,25 @@
+use crate::io;
+use crate::num::NonZero;
+use crate::ptr::NonNull;
+use crate::time::Duration;
+
+pub fn available_parallelism() -> io::Result<NonZero<usize>> {
+    // UEFI is single threaded
+    Ok(NonZero::new(1).unwrap())
+}
+
+pub fn sleep(dur: Duration) {
+    let boot_services: NonNull<r_efi::efi::BootServices> =
+        crate::os::uefi::env::boot_services().expect("can't sleep").cast();
+    let mut dur_ms = dur.as_micros();
+    // ceil up to the nearest microsecond
+    if dur.subsec_nanos() % 1000 > 0 {
+        dur_ms += 1;
+    }
+
+    while dur_ms > 0 {
+        let ms = crate::cmp::min(dur_ms, usize::MAX as u128);
+        let _ = unsafe { ((*boot_services.as_ptr()).stall)(ms as usize) };
+        dur_ms -= ms;
+    }
+}
diff --git a/library/std/src/sys/thread/unix.rs b/library/std/src/sys/thread/unix.rs
new file mode 100644
index 00000000000..2d2c4f90212
--- /dev/null
+++ b/library/std/src/sys/thread/unix.rs
@@ -0,0 +1,908 @@
+#[cfg(not(any(
+    target_env = "newlib",
+    target_os = "l4re",
+    target_os = "emscripten",
+    target_os = "redox",
+    target_os = "hurd",
+    target_os = "aix",
+)))]
+use crate::ffi::CStr;
+use crate::mem::{self, ManuallyDrop};
+use crate::num::NonZero;
+#[cfg(all(target_os = "linux", target_env = "gnu"))]
+use crate::sys::weak::dlsym;
+#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
+use crate::sys::weak::weak;
+use crate::sys::{os, stack_overflow};
+use crate::time::Duration;
+use crate::{cmp, io, ptr};
+#[cfg(not(any(
+    target_os = "l4re",
+    target_os = "vxworks",
+    target_os = "espidf",
+    target_os = "nuttx"
+)))]
+pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
+#[cfg(target_os = "l4re")]
+pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
+#[cfg(target_os = "vxworks")]
+pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
+#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
+pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
+
+struct ThreadData {
+    name: Option<Box<str>>,
+    f: Box<dyn FnOnce()>,
+}
+
+pub struct Thread {
+    id: libc::pthread_t,
+}
+
+// Some platforms may have pthread_t as a pointer in which case we still want
+// a thread to be Send/Sync
+unsafe impl Send for Thread {}
+unsafe impl Sync for Thread {}
+
+impl Thread {
+    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub unsafe fn new(
+        stack: usize,
+        name: Option<&str>,
+        f: Box<dyn FnOnce()>,
+    ) -> io::Result<Thread> {
+        let data = Box::into_raw(Box::new(ThreadData { name: name.map(Box::from), f }));
+        let mut native: libc::pthread_t = mem::zeroed();
+        let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
+        assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
+
+        #[cfg(any(target_os = "espidf", target_os = "nuttx"))]
+        if stack > 0 {
+            // Only set the stack if a non-zero value is passed
+            // 0 is used as an indication that the default stack size configured in the ESP-IDF/NuttX menuconfig system should be used
+            assert_eq!(
+                libc::pthread_attr_setstacksize(
+                    attr.as_mut_ptr(),
+                    cmp::max(stack, min_stack_size(attr.as_ptr()))
+                ),
+                0
+            );
+        }
+
+        #[cfg(not(any(target_os = "espidf", target_os = "nuttx")))]
+        {
+            let stack_size = cmp::max(stack, min_stack_size(attr.as_ptr()));
+
+            match libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) {
+                0 => {}
+                n => {
+                    assert_eq!(n, libc::EINVAL);
+                    // EINVAL means |stack_size| is either too small or not a
+                    // multiple of the system page size. Because it's definitely
+                    // >= PTHREAD_STACK_MIN, it must be an alignment issue.
+                    // Round up to the nearest page and try again.
+                    let page_size = os::page_size();
+                    let stack_size =
+                        (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
+
+                    // Some libc implementations, e.g. musl, place an upper bound
+                    // on the stack size, in which case we can only gracefully return
+                    // an error here.
+                    if libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) != 0 {
+                        assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
+                        drop(Box::from_raw(data));
+                        return Err(io::const_error!(
+                            io::ErrorKind::InvalidInput,
+                            "invalid stack size"
+                        ));
+                    }
+                }
+            };
+        }
+
+        let ret = libc::pthread_create(&mut native, attr.as_ptr(), thread_start, data as *mut _);
+        // Note: if the thread creation fails and this assert fails, then p will
+        // be leaked. However, an alternative design could cause double-free
+        // which is clearly worse.
+        assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
+
+        return if ret != 0 {
+            // The thread failed to start and as a result p was not consumed. Therefore, it is
+            // safe to reconstruct the box so that it gets deallocated.
+            drop(Box::from_raw(data));
+            Err(io::Error::from_raw_os_error(ret))
+        } else {
+            Ok(Thread { id: native })
+        };
+
+        extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
+            unsafe {
+                let data = Box::from_raw(data as *mut ThreadData);
+                // Next, set up our stack overflow handler which may get triggered if we run
+                // out of stack.
+                let _handler = stack_overflow::Handler::new(data.name);
+                // Finally, let's run some code.
+                (data.f)();
+            }
+            ptr::null_mut()
+        }
+    }
+
+    pub fn join(self) {
+        let id = self.into_id();
+        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
+        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
+    }
+
+    pub fn id(&self) -> libc::pthread_t {
+        self.id
+    }
+
+    pub fn into_id(self) -> libc::pthread_t {
+        ManuallyDrop::new(self).id
+    }
+}
+
+impl Drop for Thread {
+    fn drop(&mut self) {
+        let ret = unsafe { libc::pthread_detach(self.id) };
+        debug_assert_eq!(ret, 0);
+    }
+}
+
+pub fn available_parallelism() -> io::Result<NonZero<usize>> {
+    cfg_select! {
+        any(
+            target_os = "android",
+            target_os = "emscripten",
+            target_os = "fuchsia",
+            target_os = "hurd",
+            target_os = "linux",
+            target_os = "aix",
+            target_vendor = "apple",
+            target_os = "cygwin",
+        ) => {
+            #[allow(unused_assignments)]
+            #[allow(unused_mut)]
+            let mut quota = usize::MAX;
+
+            #[cfg(any(target_os = "android", target_os = "linux"))]
+            {
+                quota = cgroups::quota().max(1);
+                let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
+                unsafe {
+                    if libc::sched_getaffinity(0, size_of::<libc::cpu_set_t>(), &mut set) == 0 {
+                        let count = libc::CPU_COUNT(&set) as usize;
+                        let count = count.min(quota);
+
+                        // According to sched_getaffinity's API it should always be non-zero, but
+                        // some old MIPS kernels were buggy and zero-initialized the mask if
+                        // none was explicitly set.
+                        // In that case we use the sysconf fallback.
+                        if let Some(count) = NonZero::new(count) {
+                            return Ok(count)
+                        }
+                    }
+                }
+            }
+            match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
+                -1 => Err(io::Error::last_os_error()),
+                0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
+                cpus => {
+                    let count = cpus as usize;
+                    // Cover the unusual situation where we were able to get the quota but not the affinity mask
+                    let count = count.min(quota);
+                    Ok(unsafe { NonZero::new_unchecked(count) })
+                }
+            }
+        }
+        any(
+           target_os = "freebsd",
+           target_os = "dragonfly",
+           target_os = "openbsd",
+           target_os = "netbsd",
+        ) => {
+            use crate::ptr;
+
+            #[cfg(target_os = "freebsd")]
+            {
+                let mut set: libc::cpuset_t = unsafe { mem::zeroed() };
+                unsafe {
+                    if libc::cpuset_getaffinity(
+                        libc::CPU_LEVEL_WHICH,
+                        libc::CPU_WHICH_PID,
+                        -1,
+                        size_of::<libc::cpuset_t>(),
+                        &mut set,
+                    ) == 0 {
+                        let count = libc::CPU_COUNT(&set) as usize;
+                        if count > 0 {
+                            return Ok(NonZero::new_unchecked(count));
+                        }
+                    }
+                }
+            }
+
+            #[cfg(target_os = "netbsd")]
+            {
+                unsafe {
+                    let set = libc::_cpuset_create();
+                    if !set.is_null() {
+                        let mut count: usize = 0;
+                        if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 {
+                            for i in 0..libc::cpuid_t::MAX {
+                                match libc::_cpuset_isset(i, set) {
+                                    -1 => break,
+                                    0 => continue,
+                                    _ => count = count + 1,
+                                }
+                            }
+                        }
+                        libc::_cpuset_destroy(set);
+                        if let Some(count) = NonZero::new(count) {
+                            return Ok(count);
+                        }
+                    }
+                }
+            }
+
+            let mut cpus: libc::c_uint = 0;
+            let mut cpus_size = size_of_val(&cpus);
+
+            unsafe {
+                cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint;
+            }
+
+            // Fallback approach in case of errors or no hardware threads.
+            if cpus < 1 {
+                let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0];
+                let res = unsafe {
+                    libc::sysctl(
+                        mib.as_mut_ptr(),
+                        2,
+                        (&raw mut cpus) as *mut _,
+                        (&raw mut cpus_size) as *mut _,
+                        ptr::null_mut(),
+                        0,
+                    )
+                };
+
+                // Handle errors if any.
+                if res == -1 {
+                    return Err(io::Error::last_os_error());
+                } else if cpus == 0 {
+                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
+                }
+            }
+
+            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
+        }
+        target_os = "nto" => {
+            unsafe {
+                use libc::_syspage_ptr;
+                if _syspage_ptr.is_null() {
+                    Err(io::const_error!(io::ErrorKind::NotFound, "no syspage available"))
+                } else {
+                    let cpus = (*_syspage_ptr).num_cpu;
+                    NonZero::new(cpus as usize)
+                        .ok_or(io::Error::UNKNOWN_THREAD_COUNT)
+                }
+            }
+        }
+        any(target_os = "solaris", target_os = "illumos") => {
+            let mut cpus = 0u32;
+            if unsafe { libc::pset_info(libc::PS_MYID, core::ptr::null_mut(), &mut cpus, core::ptr::null_mut()) } != 0 {
+                return Err(io::Error::UNKNOWN_THREAD_COUNT);
+            }
+            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
+        }
+        target_os = "haiku" => {
+            // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus`
+            // `get_system_info` calls then `smp_get_num_cpus`
+            unsafe {
+                let mut sinfo: libc::system_info = crate::mem::zeroed();
+                let res = libc::get_system_info(&mut sinfo);
+
+                if res != libc::B_OK {
+                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
+                }
+
+                Ok(NonZero::new_unchecked(sinfo.cpu_count as usize))
+            }
+        }
+        target_os = "vxworks" => {
+            // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF
+            // expectations than the actual cores availability.
+            unsafe extern "C" {
+                fn vxCpuEnabledGet() -> libc::cpuset_t;
+            }
+
+            // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set
+            unsafe{
+                let set = vxCpuEnabledGet();
+                Ok(NonZero::new_unchecked(set.count_ones() as usize))
+            }
+        }
+        _ => {
+            // FIXME: implement on Redox, l4re
+            Err(io::const_error!(io::ErrorKind::Unsupported, "getting the number of hardware threads is not supported on the target platform"))
+        }
+    }
+}
+
+pub fn current_os_id() -> Option<u64> {
+    // Most Unix platforms have a way to query an integer ID of the current thread, all with
+    // slightly different spellings.
+    //
+    // The OS thread ID is used rather than `pthread_self` so as to match what will be displayed
+    // for process inspection (debuggers, trace, `top`, etc.).
+    cfg_select! {
+        // Most platforms have a function returning a `pid_t` or int, which is an `i32`.
+        any(target_os = "android", target_os = "linux") => {
+            use crate::sys::pal::weak::syscall;
+
+            // `libc::gettid` is only available on glibc 2.30+, but the syscall is available
+            // since Linux 2.4.11.
+            syscall!(fn gettid() -> libc::pid_t;);
+
+            // SAFETY: FFI call with no preconditions.
+            let id: libc::pid_t = unsafe { gettid() };
+            Some(id as u64)
+        }
+        target_os = "nto" => {
+            // SAFETY: FFI call with no preconditions.
+            let id: libc::pid_t = unsafe { libc::gettid() };
+            Some(id as u64)
+        }
+        target_os = "openbsd" => {
+            // SAFETY: FFI call with no preconditions.
+            let id: libc::pid_t = unsafe { libc::getthrid() };
+            Some(id as u64)
+        }
+        target_os = "freebsd" => {
+            // SAFETY: FFI call with no preconditions.
+            let id: libc::c_int = unsafe { libc::pthread_getthreadid_np() };
+            Some(id as u64)
+        }
+        target_os = "netbsd" => {
+            // SAFETY: FFI call with no preconditions.
+            let id: libc::lwpid_t = unsafe { libc::_lwp_self() };
+            Some(id as u64)
+        }
+        any(target_os = "illumos", target_os = "solaris") => {
+            // On Illumos and Solaris, the `pthread_t` is the same as the OS thread ID.
+            // SAFETY: FFI call with no preconditions.
+            let id: libc::pthread_t = unsafe { libc::pthread_self() };
+            Some(id as u64)
+        }
+        target_vendor = "apple" => {
+            // Apple allows querying arbitrary thread IDs, `thread=NULL` queries the current thread.
+            let mut id = 0u64;
+            // SAFETY: `thread_id` is a valid pointer, no other preconditions.
+            let status: libc::c_int = unsafe { libc::pthread_threadid_np(0, &mut id) };
+            if status == 0 {
+                Some(id)
+            } else {
+                None
+            }
+        }
+        // Other platforms don't have an OS thread ID or don't have a way to access it.
+        _ => None,
+    }
+}
+
+#[cfg(any(
+    target_os = "linux",
+    target_os = "nto",
+    target_os = "solaris",
+    target_os = "illumos",
+    target_os = "vxworks",
+    target_os = "cygwin",
+    target_vendor = "apple",
+))]
+fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] {
+    let mut result = [0; MAX_WITH_NUL];
+    for (src, dst) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) {
+        *dst = *src as libc::c_char;
+    }
+    result
+}
+
+#[cfg(target_os = "android")]
+pub fn set_name(name: &CStr) {
+    const PR_SET_NAME: libc::c_int = 15;
+    unsafe {
+        let res = libc::prctl(
+            PR_SET_NAME,
+            name.as_ptr(),
+            0 as libc::c_ulong,
+            0 as libc::c_ulong,
+            0 as libc::c_ulong,
+        );
+        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
+        debug_assert_eq!(res, 0);
+    }
+}
+
+#[cfg(any(
+    target_os = "linux",
+    target_os = "freebsd",
+    target_os = "dragonfly",
+    target_os = "nuttx",
+    target_os = "cygwin"
+))]
+pub fn set_name(name: &CStr) {
+    unsafe {
+        cfg_select! {
+            any(target_os = "linux", target_os = "cygwin") => {
+                // Linux and Cygwin limits the allowed length of the name.
+                const TASK_COMM_LEN: usize = 16;
+                let name = truncate_cstr::<{ TASK_COMM_LEN }>(name);
+            }
+            _ => {
+                // FreeBSD, DragonFly BSD and NuttX do not enforce length limits.
+            }
+        };
+        // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20 for Linux,
+        // FreeBSD 12.2 and 13.0, and DragonFly BSD 6.0.
+        let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr());
+        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
+        debug_assert_eq!(res, 0);
+    }
+}
+
+#[cfg(target_os = "openbsd")]
+pub fn set_name(name: &CStr) {
+    unsafe {
+        libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr());
+    }
+}
+
+#[cfg(target_vendor = "apple")]
+pub fn set_name(name: &CStr) {
+    unsafe {
+        let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name);
+        let res = libc::pthread_setname_np(name.as_ptr());
+        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
+        debug_assert_eq!(res, 0);
+    }
+}
+
+#[cfg(target_os = "netbsd")]
+pub fn set_name(name: &CStr) {
+    unsafe {
+        let res = libc::pthread_setname_np(
+            libc::pthread_self(),
+            c"%s".as_ptr(),
+            name.as_ptr() as *mut libc::c_void,
+        );
+        debug_assert_eq!(res, 0);
+    }
+}
+
+#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto"))]
+pub fn set_name(name: &CStr) {
+    weak!(
+        fn pthread_setname_np(thread: libc::pthread_t, name: *const libc::c_char) -> libc::c_int;
+    );
+
+    if let Some(f) = pthread_setname_np.get() {
+        #[cfg(target_os = "nto")]
+        const THREAD_NAME_MAX: usize = libc::_NTO_THREAD_NAME_MAX as usize;
+        #[cfg(any(target_os = "solaris", target_os = "illumos"))]
+        const THREAD_NAME_MAX: usize = 32;
+
+        let name = truncate_cstr::<{ THREAD_NAME_MAX }>(name);
+        let res = unsafe { f(libc::pthread_self(), name.as_ptr()) };
+        debug_assert_eq!(res, 0);
+    }
+}
+
+#[cfg(target_os = "fuchsia")]
+pub fn set_name(name: &CStr) {
+    use crate::sys::pal::fuchsia::*;
+    unsafe {
+        zx_object_set_property(
+            zx_thread_self(),
+            ZX_PROP_NAME,
+            name.as_ptr() as *const libc::c_void,
+            name.to_bytes().len(),
+        );
+    }
+}
+
+#[cfg(target_os = "haiku")]
+pub fn set_name(name: &CStr) {
+    unsafe {
+        let thread_self = libc::find_thread(ptr::null_mut());
+        let res = libc::rename_thread(thread_self, name.as_ptr());
+        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
+        debug_assert_eq!(res, libc::B_OK);
+    }
+}
+
+#[cfg(target_os = "vxworks")]
+pub fn set_name(name: &CStr) {
+    let mut name = truncate_cstr::<{ (libc::VX_TASK_RENAME_LENGTH - 1) as usize }>(name);
+    let res = unsafe { libc::taskNameSet(libc::taskIdSelf(), name.as_mut_ptr()) };
+    debug_assert_eq!(res, libc::OK);
+}
+
+#[cfg(not(target_os = "espidf"))]
+pub fn sleep(dur: Duration) {
+    let mut secs = dur.as_secs();
+    let mut nsecs = dur.subsec_nanos() as _;
+
+    // If we're awoken with a signal then the return value will be -1 and
+    // nanosleep will fill in `ts` with the remaining time.
+    unsafe {
+        while secs > 0 || nsecs > 0 {
+            let mut ts = libc::timespec {
+                tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t,
+                tv_nsec: nsecs,
+            };
+            secs -= ts.tv_sec as u64;
+            let ts_ptr = &raw mut ts;
+            if libc::nanosleep(ts_ptr, ts_ptr) == -1 {
+                assert_eq!(os::errno(), libc::EINTR);
+                secs += ts.tv_sec as u64;
+                nsecs = ts.tv_nsec;
+            } else {
+                nsecs = 0;
+            }
+        }
+    }
+}
+
+#[cfg(target_os = "espidf")]
+pub fn sleep(dur: Duration) {
+    // ESP-IDF does not have `nanosleep`, so we use `usleep` instead.
+    // As per the documentation of `usleep`, it is expected to support
+    // sleep times as big as at least up to 1 second.
+    //
+    // ESP-IDF does support almost up to `u32::MAX`, but due to a potential integer overflow in its
+    // `usleep` implementation
+    // (https://github.com/espressif/esp-idf/blob/d7ca8b94c852052e3bc33292287ef4dd62c9eeb1/components/newlib/time.c#L210),
+    // we limit the sleep time to the maximum one that would not cause the underlying `usleep` implementation to overflow
+    // (`portTICK_PERIOD_MS` can be anything between 1 to 1000, and is 10 by default).
+    const MAX_MICROS: u32 = u32::MAX - 1_000_000 - 1;
+
+    // Add any nanoseconds smaller than a microsecond as an extra microsecond
+    // so as to comply with the `std::thread::sleep` contract which mandates
+    // implementations to sleep for _at least_ the provided `dur`.
+    // We can't overflow `micros` as it is a `u128`, while `Duration` is a pair of
+    // (`u64` secs, `u32` nanos), where the nanos are strictly smaller than 1 second
+    // (i.e. < 1_000_000_000)
+    let mut micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
+
+    while micros > 0 {
+        let st = if micros > MAX_MICROS as u128 { MAX_MICROS } else { micros as u32 };
+        unsafe {
+            libc::usleep(st);
+        }
+
+        micros -= st as u128;
+    }
+}
+
+// Any unix that has clock_nanosleep
+// If this list changes update the MIRI chock_nanosleep shim
+#[cfg(any(
+    target_os = "freebsd",
+    target_os = "netbsd",
+    target_os = "linux",
+    target_os = "android",
+    target_os = "solaris",
+    target_os = "illumos",
+    target_os = "dragonfly",
+    target_os = "hurd",
+    target_os = "fuchsia",
+    target_os = "vxworks",
+))]
+pub fn sleep_until(deadline: crate::time::Instant) {
+    use crate::time::Instant;
+
+    let Some(ts) = deadline.into_inner().into_timespec().to_timespec() else {
+        // The deadline is further in the future then can be passed to
+        // clock_nanosleep. We have to use Self::sleep instead. This might
+        // happen on 32 bit platforms, especially closer to 2038.
+        let now = Instant::now();
+        if let Some(delay) = deadline.checked_duration_since(now) {
+            sleep(delay);
+        }
+        return;
+    };
+
+    unsafe {
+        // When we get interrupted (res = EINTR) call clock_nanosleep again
+        loop {
+            let res = libc::clock_nanosleep(
+                crate::sys::time::Instant::CLOCK_ID,
+                libc::TIMER_ABSTIME,
+                &ts,
+                core::ptr::null_mut(), // not required with TIMER_ABSTIME
+            );
+
+            if res == 0 {
+                break;
+            } else {
+                assert_eq!(
+                    res,
+                    libc::EINTR,
+                    "timespec is in range,
+                         clockid is valid and kernel should support it"
+                );
+            }
+        }
+    }
+}
+
+pub fn yield_now() {
+    let ret = unsafe { libc::sched_yield() };
+    debug_assert_eq!(ret, 0);
+}
+
+#[cfg(any(target_os = "android", target_os = "linux"))]
+mod cgroups {
+    //! Currently not covered
+    //! * cgroup v2 in non-standard mountpoints
+    //! * paths containing control characters or spaces, since those would be escaped in procfs
+    //!   output and we don't unescape
+
+    use crate::borrow::Cow;
+    use crate::ffi::OsString;
+    use crate::fs::{File, exists};
+    use crate::io::{BufRead, Read};
+    use crate::os::unix::ffi::OsStringExt;
+    use crate::path::{Path, PathBuf};
+    use crate::str::from_utf8;
+
+    #[derive(PartialEq)]
+    enum Cgroup {
+        V1,
+        V2,
+    }
+
+    /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
+    /// be determined or is not set.
+    pub(super) fn quota() -> usize {
+        let mut quota = usize::MAX;
+        if cfg!(miri) {
+            // Attempting to open a file fails under default flags due to isolation.
+            // And Miri does not have parallelism anyway.
+            return quota;
+        }
+
+        let _: Option<()> = try {
+            let mut buf = Vec::with_capacity(128);
+            // find our place in the cgroup hierarchy
+            File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
+            let (cgroup_path, version) =
+                buf.split(|&c| c == b'\n').fold(None, |previous, line| {
+                    let mut fields = line.splitn(3, |&c| c == b':');
+                    // 2nd field is a list of controllers for v1 or empty for v2
+                    let version = match fields.nth(1) {
+                        Some(b"") => Cgroup::V2,
+                        Some(controllers)
+                            if from_utf8(controllers)
+                                .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) =>
+                        {
+                            Cgroup::V1
+                        }
+                        _ => return previous,
+                    };
+
+                    // already-found v1 trumps v2 since it explicitly specifies its controllers
+                    if previous.is_some() && version == Cgroup::V2 {
+                        return previous;
+                    }
+
+                    let path = fields.last()?;
+                    // skip leading slash
+                    Some((path[1..].to_owned(), version))
+                })?;
+            let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
+
+            quota = match version {
+                Cgroup::V1 => quota_v1(cgroup_path),
+                Cgroup::V2 => quota_v2(cgroup_path),
+            };
+        };
+
+        quota
+    }
+
+    fn quota_v2(group_path: PathBuf) -> usize {
+        let mut quota = usize::MAX;
+
+        let mut path = PathBuf::with_capacity(128);
+        let mut read_buf = String::with_capacity(20);
+
+        // standard mount location defined in file-hierarchy(7) manpage
+        let cgroup_mount = "/sys/fs/cgroup";
+
+        path.push(cgroup_mount);
+        path.push(&group_path);
+
+        path.push("cgroup.controllers");
+
+        // skip if we're not looking at cgroup2
+        if matches!(exists(&path), Err(_) | Ok(false)) {
+            return usize::MAX;
+        };
+
+        path.pop();
+
+        let _: Option<()> = try {
+            while path.starts_with(cgroup_mount) {
+                path.push("cpu.max");
+
+                read_buf.clear();
+
+                if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
+                    let raw_quota = read_buf.lines().next()?;
+                    let mut raw_quota = raw_quota.split(' ');
+                    let limit = raw_quota.next()?;
+                    let period = raw_quota.next()?;
+                    match (limit.parse::<usize>(), period.parse::<usize>()) {
+                        (Ok(limit), Ok(period)) if period > 0 => {
+                            quota = quota.min(limit / period);
+                        }
+                        _ => {}
+                    }
+                }
+
+                path.pop(); // pop filename
+                path.pop(); // pop dir
+            }
+        };
+
+        quota
+    }
+
+    fn quota_v1(group_path: PathBuf) -> usize {
+        let mut quota = usize::MAX;
+        let mut path = PathBuf::with_capacity(128);
+        let mut read_buf = String::with_capacity(20);
+
+        // Hardcode commonly used locations mentioned in the cgroups(7) manpage
+        // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
+        let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
+            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
+            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
+            // this can be expensive on systems with tons of mountpoints
+            // but we only get to this point when /proc/self/cgroups explicitly indicated
+            // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
+            find_mountpoint,
+        ];
+
+        for mount in mounts {
+            let Some((mount, group_path)) = mount(&group_path) else { continue };
+
+            path.clear();
+            path.push(mount.as_ref());
+            path.push(&group_path);
+
+            // skip if we guessed the mount incorrectly
+            if matches!(exists(&path), Err(_) | Ok(false)) {
+                continue;
+            }
+
+            while path.starts_with(mount.as_ref()) {
+                let mut parse_file = |name| {
+                    path.push(name);
+                    read_buf.clear();
+
+                    let f = File::open(&path);
+                    path.pop(); // restore buffer before any early returns
+                    f.ok()?.read_to_string(&mut read_buf).ok()?;
+                    let parsed = read_buf.trim().parse::<usize>().ok()?;
+
+                    Some(parsed)
+                };
+
+                let limit = parse_file("cpu.cfs_quota_us");
+                let period = parse_file("cpu.cfs_period_us");
+
+                match (limit, period) {
+                    (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period),
+                    _ => {}
+                }
+
+                path.pop();
+            }
+
+            // we passed the try_exists above so we should have traversed the correct hierarchy
+            // when reaching this line
+            break;
+        }
+
+        quota
+    }
+
+    /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
+    ///
+    /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
+    /// over the already-included prefix
+    fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
+        let mut reader = File::open_buffered("/proc/self/mountinfo").ok()?;
+        let mut line = String::with_capacity(256);
+        loop {
+            line.clear();
+            if reader.read_line(&mut line).ok()? == 0 {
+                break;
+            }
+
+            let line = line.trim();
+            let mut items = line.split(' ');
+
+            let sub_path = items.nth(3)?;
+            let mount_point = items.next()?;
+            let mount_opts = items.next_back()?;
+            let filesystem_type = items.nth_back(1)?;
+
+            if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
+                // not a cgroup / not a cpu-controller
+                continue;
+            }
+
+            let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
+
+            if !group_path.starts_with(sub_path) {
+                // this is a bind-mount and the bound subdirectory
+                // does not contain the cgroup this process belongs to
+                continue;
+            }
+
+            let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
+
+            return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
+        }
+
+        None
+    }
+}
+
+// glibc >= 2.15 has a __pthread_get_minstack() function that returns
+// PTHREAD_STACK_MIN plus bytes needed for thread-local storage.
+// We need that information to avoid blowing up when a small stack
+// is created in an application with big thread-local storage requirements.
+// See #6233 for rationale and details.
+#[cfg(all(target_os = "linux", target_env = "gnu"))]
+unsafe fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize {
+    // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628)
+    // We shouldn't really be using such an internal symbol, but there's currently
+    // no other way to account for the TLS size.
+    dlsym!(
+        fn __pthread_get_minstack(attr: *const libc::pthread_attr_t) -> libc::size_t;
+    );
+
+    match __pthread_get_minstack.get() {
+        None => libc::PTHREAD_STACK_MIN,
+        Some(f) => unsafe { f(attr) },
+    }
+}
+
+// No point in looking up __pthread_get_minstack() on non-glibc platforms.
+#[cfg(all(
+    not(all(target_os = "linux", target_env = "gnu")),
+    not(any(target_os = "netbsd", target_os = "nuttx"))
+))]
+unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
+    libc::PTHREAD_STACK_MIN
+}
+
+#[cfg(any(target_os = "netbsd", target_os = "nuttx"))]
+unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
+    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
+
+    *STACK.get_or_init(|| {
+        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
+        if stack < 0 {
+            stack = 2048; // just a guess
+        }
+
+        stack as usize
+    })
+}
diff --git a/library/std/src/sys/thread/unsupported.rs b/library/std/src/sys/thread/unsupported.rs
new file mode 100644
index 00000000000..a5001efa3b4
--- /dev/null
+++ b/library/std/src/sys/thread/unsupported.rs
@@ -0,0 +1,43 @@
+use crate::ffi::CStr;
+use crate::io;
+use crate::num::NonZero;
+use crate::time::Duration;
+
+pub struct Thread(!);
+
+pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024;
+
+impl Thread {
+    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
+    pub unsafe fn new(
+        _stack: usize,
+        _name: Option<&str>,
+        _p: Box<dyn FnOnce()>,
+    ) -> io::Result<Thread> {
+        Err(io::Error::UNSUPPORTED_PLATFORM)
+    }
+
+    pub fn join(self) {
+        self.0
+    }
+}
+
+pub fn available_parallelism() -> io::Result<NonZero<usize>> {
+    Err(io::Error::UNKNOWN_THREAD_COUNT)
+}
+
+pub fn current_os_id() -> Option<u64> {
+    None
+}
+
+pub fn yield_now() {
+    // do nothing
+}
+
+pub fn set_name(_name: &CStr) {
+    // nope
+}
+
+pub fn sleep(_dur: Duration) {
+    panic!("can't sleep");
+}
diff --git a/library/std/src/sys/thread/wasip1.rs b/library/std/src/sys/thread/wasip1.rs
new file mode 100644
index 00000000000..83001fad49c
--- /dev/null
+++ b/library/std/src/sys/thread/wasip1.rs
@@ -0,0 +1,185 @@
+#![forbid(unsafe_op_in_unsafe_fn)]
+
+#[cfg(target_feature = "atomics")]
+use crate::io;
+use crate::mem;
+#[cfg(target_feature = "atomics")]
+use crate::num::NonZero;
+#[cfg(target_feature = "atomics")]
+use crate::sys::os;
+use crate::time::Duration;
+#[cfg(target_feature = "atomics")]
+use crate::{cmp, ptr};
+
+// Add a few symbols not in upstream `libc` just yet.
+#[cfg(target_feature = "atomics")]
+mod libc {
+    pub use libc::*;
+
+    pub use crate::ffi;
+
+    // defined in wasi-libc
+    // https://github.com/WebAssembly/wasi-libc/blob/a6f871343313220b76009827ed0153586361c0d5/libc-top-half/musl/include/alltypes.h.in#L108
+    #[repr(C)]
+    union pthread_attr_union {
+        __i: [ffi::c_int; if size_of::<ffi::c_long>() == 8 { 14 } else { 9 }],
+        __vi: [ffi::c_int; if size_of::<ffi::c_long>() == 8 { 14 } else { 9 }],
+        __s: [ffi::c_ulong; if size_of::<ffi::c_long>() == 8 { 7 } else { 9 }],
+    }
+
+    #[repr(C)]
+    pub struct pthread_attr_t {
+        __u: pthread_attr_union,
+    }
+
+    #[allow(non_camel_case_types)]
+    pub type pthread_t = *mut ffi::c_void;
+
+    pub const _SC_NPROCESSORS_ONLN: ffi::c_int = 84;
+
+    unsafe extern "C" {
+        pub fn pthread_create(
+            native: *mut pthread_t,
+            attr: *const pthread_attr_t,
+            f: extern "C" fn(*mut ffi::c_void) -> *mut ffi::c_void,
+            value: *mut ffi::c_void,
+        ) -> ffi::c_int;
+        pub fn pthread_join(native: pthread_t, value: *mut *mut ffi::c_void) -> ffi::c_int;
+        pub fn pthread_attr_init(attrp: *mut pthread_attr_t) -> ffi::c_int;
+        pub fn pthread_attr_setstacksize(
+            attr: *mut pthread_attr_t,
+            stack_size: libc::size_t,
+        ) -> ffi::c_int;
+        pub fn pthread_attr_destroy(attr: *mut pthread_attr_t) -> ffi::c_int;
+        pub fn pthread_detach(thread: pthread_t) -> ffi::c_int;
+    }
+}
+
+#[cfg(target_feature = "atomics")]
+pub struct Thread {
+    id: libc::pthread_t,
+}
+
+#[cfg(target_feature = "atomics")]
+impl Drop for Thread {
+    fn drop(&mut self) {
+        let ret = unsafe { libc::pthread_detach(self.id) };
+        debug_assert_eq!(ret, 0);
+    }
+}
+
+pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
+
+#[cfg(target_feature = "atomics")]
+impl Thread {
+    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
+    pub unsafe fn new(
+        stack: usize,
+        _name: Option<&str>,
+        p: Box<dyn FnOnce()>,
+    ) -> io::Result<Thread> {
+        let p = Box::into_raw(Box::new(p));
+        let mut native: libc::pthread_t = unsafe { mem::zeroed() };
+        let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() };
+        assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0);
+
+        let stack_size = cmp::max(stack, DEFAULT_MIN_STACK_SIZE);
+
+        match unsafe { libc::pthread_attr_setstacksize(&mut attr, stack_size) } {
+            0 => {}
+            n => {
+                assert_eq!(n, libc::EINVAL);
+                // EINVAL means |stack_size| is either too small or not a
+                // multiple of the system page size. Because it's definitely
+                // >= PTHREAD_STACK_MIN, it must be an alignment issue.
+                // Round up to the nearest page and try again.
+                let page_size = os::page_size();
+                let stack_size =
+                    (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
+                assert_eq!(unsafe { libc::pthread_attr_setstacksize(&mut attr, stack_size) }, 0);
+            }
+        };
+
+        let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) };
+        // Note: if the thread creation fails and this assert fails, then p will
+        // be leaked. However, an alternative design could cause double-free
+        // which is clearly worse.
+        assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0);
+
+        return if ret != 0 {
+            // The thread failed to start and as a result p was not consumed. Therefore, it is
+            // safe to reconstruct the box so that it gets deallocated.
+            unsafe {
+                drop(Box::from_raw(p));
+            }
+            Err(io::Error::from_raw_os_error(ret))
+        } else {
+            Ok(Thread { id: native })
+        };
+
+        extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void {
+            unsafe {
+                // Finally, let's run some code.
+                Box::from_raw(main as *mut Box<dyn FnOnce()>)();
+            }
+            ptr::null_mut()
+        }
+    }
+
+    pub fn join(self) {
+        let id = mem::ManuallyDrop::new(self).id;
+        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
+        if ret != 0 {
+            rtabort!("failed to join thread: {}", io::Error::from_raw_os_error(ret));
+        }
+    }
+}
+
+#[cfg(target_feature = "atomics")]
+pub fn available_parallelism() -> io::Result<NonZero<usize>> {
+    match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
+        -1 => Err(io::Error::last_os_error()),
+        cpus => NonZero::new(cpus as usize).ok_or(io::Error::UNKNOWN_THREAD_COUNT),
+    }
+}
+
+pub fn yield_now() {
+    let ret = unsafe { wasi::sched_yield() };
+    debug_assert_eq!(ret, Ok(()));
+}
+
+pub fn sleep(dur: Duration) {
+    let mut nanos = dur.as_nanos();
+    while nanos > 0 {
+        const USERDATA: wasi::Userdata = 0x0123_45678;
+
+        let clock = wasi::SubscriptionClock {
+            id: wasi::CLOCKID_MONOTONIC,
+            timeout: u64::try_from(nanos).unwrap_or(u64::MAX),
+            precision: 0,
+            flags: 0,
+        };
+        nanos -= u128::from(clock.timeout);
+
+        let in_ = wasi::Subscription {
+            userdata: USERDATA,
+            u: wasi::SubscriptionU { tag: 0, u: wasi::SubscriptionUU { clock } },
+        };
+        unsafe {
+            let mut event: wasi::Event = mem::zeroed();
+            let res = wasi::poll_oneoff(&in_, &mut event, 1);
+            match (res, event) {
+                (
+                    Ok(1),
+                    wasi::Event {
+                        userdata: USERDATA,
+                        error: wasi::ERRNO_SUCCESS,
+                        type_: wasi::EVENTTYPE_CLOCK,
+                        ..
+                    },
+                ) => {}
+                _ => panic!("thread::sleep(): unexpected result of poll_oneoff"),
+            }
+        }
+    }
+}
diff --git a/library/std/src/sys/thread/wasip2.rs b/library/std/src/sys/thread/wasip2.rs
new file mode 100644
index 00000000000..420cad2a5e4
--- /dev/null
+++ b/library/std/src/sys/thread/wasip2.rs
@@ -0,0 +1,32 @@
+use crate::time::{Duration, Instant};
+
+pub fn sleep(dur: Duration) {
+    // Sleep in increments of `u64::MAX` nanoseconds until the `dur` is
+    // entirely drained.
+    let mut remaining = dur.as_nanos();
+    while remaining > 0 {
+        let amt = u64::try_from(remaining).unwrap_or(u64::MAX);
+        wasip2::clocks::monotonic_clock::subscribe_duration(amt).block();
+        remaining -= u128::from(amt);
+    }
+}
+
+pub fn sleep_until(deadline: Instant) {
+    match u64::try_from(deadline.into_inner().as_duration().as_nanos()) {
+        // If the point in time we're sleeping to fits within a 64-bit
+        // number of nanoseconds then directly use `subscribe_instant`.
+        Ok(deadline) => {
+            wasip2::clocks::monotonic_clock::subscribe_instant(deadline).block();
+        }
+        // ... otherwise we're sleeping for 500+ years relative to the
+        // "start" of what the system is using as a clock so speed/accuracy
+        // is not so much of a concern. Use `sleep` instead.
+        Err(_) => {
+            let now = Instant::now();
+
+            if let Some(delay) = deadline.checked_duration_since(now) {
+                sleep(delay);
+            }
+        }
+    }
+}
diff --git a/library/std/src/sys/thread/wasm.rs b/library/std/src/sys/thread/wasm.rs
new file mode 100644
index 00000000000..e843bc992ba
--- /dev/null
+++ b/library/std/src/sys/thread/wasm.rs
@@ -0,0 +1,23 @@
+use crate::cmp;
+use crate::time::Duration;
+
+pub fn sleep(dur: Duration) {
+    #[cfg(target_arch = "wasm32")]
+    use core::arch::wasm32 as wasm;
+    #[cfg(target_arch = "wasm64")]
+    use core::arch::wasm64 as wasm;
+
+    // Use an atomic wait to block the current thread artificially with a
+    // timeout listed. Note that we should never be notified (return value
+    // of 0) or our comparison should never fail (return value of 1) so we
+    // should always only resume execution through a timeout (return value
+    // 2).
+    let mut nanos = dur.as_nanos();
+    while nanos > 0 {
+        let amt = cmp::min(i64::MAX as u128, nanos);
+        let mut x = 0;
+        let val = unsafe { wasm::memory_atomic_wait32(&mut x, 0, amt as i64) };
+        debug_assert_eq!(val, 2);
+        nanos -= amt;
+    }
+}
diff --git a/library/std/src/sys/thread/windows.rs b/library/std/src/sys/thread/windows.rs
new file mode 100644
index 00000000000..a5640c51c4a
--- /dev/null
+++ b/library/std/src/sys/thread/windows.rs
@@ -0,0 +1,140 @@
+use core::ffi::c_void;
+
+use crate::ffi::CStr;
+use crate::num::NonZero;
+use crate::os::windows::io::{AsRawHandle, HandleOrNull};
+use crate::sys::handle::Handle;
+use crate::sys::pal::time::WaitableTimer;
+use crate::sys::pal::{dur2timeout, to_u16s};
+use crate::sys::{c, stack_overflow};
+use crate::sys_common::FromInner;
+use crate::time::Duration;
+use crate::{io, ptr};
+
+pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
+
+pub struct Thread {
+    handle: Handle,
+}
+
+impl Thread {
+    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
+    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
+    pub unsafe fn new(
+        stack: usize,
+        _name: Option<&str>,
+        p: Box<dyn FnOnce()>,
+    ) -> io::Result<Thread> {
+        let p = Box::into_raw(Box::new(p));
+
+        // CreateThread rounds up values for the stack size to the nearest page size (at least 4kb).
+        // If a value of zero is given then the default stack size is used instead.
+        // SAFETY: `thread_start` has the right ABI for a thread's entry point.
+        // `p` is simply passed through to the new thread without being touched.
+        let ret = unsafe {
+            let ret = c::CreateThread(
+                ptr::null_mut(),
+                stack,
+                Some(thread_start),
+                p as *mut _,
+                c::STACK_SIZE_PARAM_IS_A_RESERVATION,
+                ptr::null_mut(),
+            );
+            HandleOrNull::from_raw_handle(ret)
+        };
+        return if let Ok(handle) = ret.try_into() {
+            Ok(Thread { handle: Handle::from_inner(handle) })
+        } else {
+            // The thread failed to start and as a result p was not consumed. Therefore, it is
+            // safe to reconstruct the box so that it gets deallocated.
+            unsafe { drop(Box::from_raw(p)) };
+            Err(io::Error::last_os_error())
+        };
+
+        unsafe extern "system" fn thread_start(main: *mut c_void) -> u32 {
+            // Next, reserve some stack space for if we otherwise run out of stack.
+            stack_overflow::reserve_stack();
+            // Finally, let's run some code.
+            // SAFETY: We are simply recreating the box that was leaked earlier.
+            // It's the responsibility of the one who call `Thread::new` to ensure this is safe to call here.
+            unsafe { Box::from_raw(main as *mut Box<dyn FnOnce()>)() };
+            0
+        }
+    }
+
+    pub fn join(self) {
+        let rc = unsafe { c::WaitForSingleObject(self.handle.as_raw_handle(), c::INFINITE) };
+        if rc == c::WAIT_FAILED {
+            panic!("failed to join on thread: {}", io::Error::last_os_error());
+        }
+    }
+
+    pub fn handle(&self) -> &Handle {
+        &self.handle
+    }
+
+    pub fn into_handle(self) -> Handle {
+        self.handle
+    }
+}
+
+pub fn available_parallelism() -> io::Result<NonZero<usize>> {
+    let res = unsafe {
+        let mut sysinfo: c::SYSTEM_INFO = crate::mem::zeroed();
+        c::GetSystemInfo(&mut sysinfo);
+        sysinfo.dwNumberOfProcessors as usize
+    };
+    match res {
+        0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
+        cpus => Ok(unsafe { NonZero::new_unchecked(cpus) }),
+    }
+}
+
+pub fn current_os_id() -> Option<u64> {
+    // SAFETY: FFI call with no preconditions.
+    let id: u32 = unsafe { c::GetCurrentThreadId() };
+
+    // A return value of 0 indicates failed lookup.
+    if id == 0 { None } else { Some(id.into()) }
+}
+
+pub fn set_name(name: &CStr) {
+    if let Ok(utf8) = name.to_str() {
+        if let Ok(utf16) = to_u16s(utf8) {
+            unsafe {
+                // SAFETY: the vec returned by `to_u16s` ends with a zero value
+                set_name_wide(&utf16)
+            }
+        };
+    };
+}
+
+/// # Safety
+///
+/// `name` must end with a zero value
+pub unsafe fn set_name_wide(name: &[u16]) {
+    unsafe { c::SetThreadDescription(c::GetCurrentThread(), name.as_ptr()) };
+}
+
+pub fn sleep(dur: Duration) {
+    fn high_precision_sleep(dur: Duration) -> Result<(), ()> {
+        let timer = WaitableTimer::high_resolution()?;
+        timer.set(dur)?;
+        timer.wait()
+    }
+    // Attempt to use high-precision sleep (Windows 10, version 1803+).
+    // On error fallback to the standard `Sleep` function.
+    // Also preserves the zero duration behavior of `Sleep`.
+    if dur.is_zero() || high_precision_sleep(dur).is_err() {
+        unsafe { c::Sleep(dur2timeout(dur)) }
+    }
+}
+
+pub fn yield_now() {
+    // This function will return 0 if there are no other threads to execute,
+    // but this also means that the yield was useless so this isn't really a
+    // case that needs to be worried about.
+    unsafe {
+        c::SwitchToThread();
+    }
+}
diff --git a/library/std/src/sys/thread/xous.rs b/library/std/src/sys/thread/xous.rs
new file mode 100644
index 00000000000..133e15a0928
--- /dev/null
+++ b/library/std/src/sys/thread/xous.rs
@@ -0,0 +1,137 @@
+use core::arch::asm;
+
+use crate::io;
+use crate::num::NonZero;
+use crate::os::xous::ffi::{
+    MemoryFlags, Syscall, ThreadId, blocking_scalar, create_thread, do_yield, join_thread,
+    map_memory, update_memory_flags,
+};
+use crate::os::xous::services::{TicktimerScalar, ticktimer_server};
+use crate::time::Duration;
+
+pub struct Thread {
+    tid: ThreadId,
+}
+
+pub const DEFAULT_MIN_STACK_SIZE: usize = 131072;
+const MIN_STACK_SIZE: usize = 4096;
+pub const GUARD_PAGE_SIZE: usize = 4096;
+
+impl Thread {
+    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
+    pub unsafe fn new(
+        stack: usize,
+        _name: Option<&str>,
+        p: Box<dyn FnOnce()>,
+    ) -> io::Result<Thread> {
+        let p = Box::into_raw(Box::new(p));
+        let mut stack_size = crate::cmp::max(stack, MIN_STACK_SIZE);
+
+        if (stack_size & 4095) != 0 {
+            stack_size = (stack_size + 4095) & !4095;
+        }
+
+        // Allocate the whole thing, then divide it up after the fact. This ensures that
+        // even if there's a context switch during this function, the whole stack plus
+        // guard pages will remain contiguous.
+        let stack_plus_guard_pages: &mut [u8] = unsafe {
+            map_memory(
+                None,
+                None,
+                GUARD_PAGE_SIZE + stack_size + GUARD_PAGE_SIZE,
+                MemoryFlags::R | MemoryFlags::W | MemoryFlags::X,
+            )
+        }
+        .map_err(|code| io::Error::from_raw_os_error(code as i32))?;
+
+        // No access to this page. Note: Write-only pages are illegal, and will
+        // cause an access violation.
+        unsafe {
+            update_memory_flags(&mut stack_plus_guard_pages[0..GUARD_PAGE_SIZE], MemoryFlags::W)
+                .map_err(|code| io::Error::from_raw_os_error(code as i32))?
+        };
+
+        // No access to this page. Note: Write-only pages are illegal, and will
+        // cause an access violation.
+        unsafe {
+            update_memory_flags(
+                &mut stack_plus_guard_pages[(GUARD_PAGE_SIZE + stack_size)..],
+                MemoryFlags::W,
+            )
+            .map_err(|code| io::Error::from_raw_os_error(code as i32))?
+        };
+
+        let guard_page_pre = stack_plus_guard_pages.as_ptr() as usize;
+        let tid = create_thread(
+            thread_start as *mut usize,
+            &mut stack_plus_guard_pages[GUARD_PAGE_SIZE..(stack_size + GUARD_PAGE_SIZE)],
+            p as usize,
+            guard_page_pre,
+            stack_size,
+            0,
+        )
+        .map_err(|code| io::Error::from_raw_os_error(code as i32))?;
+
+        extern "C" fn thread_start(
+            main: *mut usize,
+            guard_page_pre: usize,
+            stack_size: usize,
+        ) -> ! {
+            unsafe {
+                // Run the contents of the new thread.
+                Box::from_raw(main as *mut Box<dyn FnOnce()>)();
+            }
+
+            // Destroy TLS, which will free the TLS page and call the destructor for
+            // any thread local storage (if any).
+            unsafe {
+                crate::sys::thread_local::key::destroy_tls();
+            }
+
+            // Deallocate the stack memory, along with the guard pages. Afterwards,
+            // exit the thread by returning to the magic address 0xff80_3000usize,
+            // which tells the kernel to deallocate this thread.
+            let mapped_memory_base = guard_page_pre;
+            let mapped_memory_length = GUARD_PAGE_SIZE + stack_size + GUARD_PAGE_SIZE;
+            unsafe {
+                asm!(
+                    "ecall",
+                    "ret",
+                                        in("a0") Syscall::UnmapMemory as usize,
+                                        in("a1") mapped_memory_base,
+                                        in("a2") mapped_memory_length,
+                                        in("ra") 0xff80_3000usize,
+                                        options(nomem, nostack, noreturn)
+                );
+            }
+        }
+
+        Ok(Thread { tid })
+    }
+
+    pub fn join(self) {
+        join_thread(self.tid).unwrap();
+    }
+}
+
+pub fn available_parallelism() -> io::Result<NonZero<usize>> {
+    // We're unicore right now.
+    Ok(unsafe { NonZero::new_unchecked(1) })
+}
+
+pub fn yield_now() {
+    do_yield();
+}
+
+pub fn sleep(dur: Duration) {
+    // Because the sleep server works on units of `usized milliseconds`, split
+    // the messages up into these chunks. This means we may run into issues
+    // if you try to sleep a thread for more than 49 days on a 32-bit system.
+    let mut millis = dur.as_millis();
+    while millis > 0 {
+        let sleep_duration = if millis > (usize::MAX as _) { usize::MAX } else { millis as usize };
+        blocking_scalar(ticktimer_server(), TicktimerScalar::SleepMs(sleep_duration).into())
+            .expect("failed to send message to ticktimer server");
+        millis -= sleep_duration as u128;
+    }
+}