diff options
| author | Jacob Pratt <jacob@jhpratt.dev> | 2024-03-11 03:47:22 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-11 03:47:22 -0400 |
| commit | 5a3d6c91b1892806b0cdda8b03e5ceef64a1989f (patch) | |
| tree | 85a0734b57a6cee565469eb1b85c73b65beca727 | |
| parent | 6f158164baefabb21575b85ab949b488492c0c3a (diff) | |
| parent | 50760aa2b59b54fba474283e5c5c4e028ce4d9af (diff) | |
| download | rust-5a3d6c91b1892806b0cdda8b03e5ceef64a1989f.tar.gz rust-5a3d6c91b1892806b0cdda8b03e5ceef64a1989f.zip | |
Rollup merge of #122326 - Zoxc:win-alloc-tweak, r=ChrisDenton
Optimize `process_heap_alloc` This optimizes `process_heap_alloc` introduced in https://github.com/rust-lang/rust/pull/120205. From: ``` .text:0000000180027ED0 ; std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93 .text:0000000180027ED0 public _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E .text:0000000180027ED0 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E proc near .text:0000000180027ED0 ; CODE XREF: std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+23↑p .text:0000000180027ED0 ; std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+55↑p ... .text:0000000180027ED0 push rsi .text:0000000180027ED1 push rdi .text:0000000180027ED2 sub rsp, 28h .text:0000000180027ED6 mov rsi, rdx .text:0000000180027ED9 mov edi, ecx .text:0000000180027EDB mov rcx, cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EE2 test rcx, rcx .text:0000000180027EE5 jnz short loc_180027EFC .text:0000000180027EE7 call cs:__imp_GetProcessHeap .text:0000000180027EED test rax, rax .text:0000000180027EF0 jz short loc_180027F0E .text:0000000180027EF2 mov rcx, rax .text:0000000180027EF5 mov cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E, rax ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EFC .text:0000000180027EFC loc_180027EFC: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93+15↑j .text:0000000180027EFC mov edx, edi .text:0000000180027EFE mov r8, rsi .text:0000000180027F01 add rsp, 28h .text:0000000180027F05 pop rdi .text:0000000180027F06 pop rsi .text:0000000180027F07 jmp cs:__imp_HeapAlloc .text:0000000180027F0E ; --------------------------------------------------------------------------- .text:0000000180027F0E .text:0000000180027F0E loc_180027F0E: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93+20↑j .text:0000000180027F0E xor eax, eax .text:0000000180027F10 add rsp, 28h .text:0000000180027F14 pop rdi .text:0000000180027F15 pop rsi .text:0000000180027F16 retn .text:0000000180027F16 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E endp ``` to ``` .text:0000000180027EE0 ; std::sys::pal::windows::alloc::process_heap_alloc::h70f9d61a631e5c16 .text:0000000180027EE0 public _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E .text:0000000180027EE0 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E proc near .text:0000000180027EE0 ; CODE XREF: std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+23↑p .text:0000000180027EE0 ; std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+54↑p ... .text:0000000180027EE0 mov rcx, cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EE7 test rcx, rcx .text:0000000180027EEA jz short loc_180027EF3 .text:0000000180027EEC jmp cs:__imp_HeapAlloc .text:0000000180027EF3 ; --------------------------------------------------------------------------- .text:0000000180027EF3 .text:0000000180027EF3 loc_180027EF3: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h70f9d61a631e5c16+A↑j .text:0000000180027EF3 mov ecx, edx .text:0000000180027EF5 mov rdx, r8 .text:0000000180027EF8 jmp std__sys__pal__windows__alloc__process_heap_init_and_alloc .text:0000000180027EF8 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E endp ``` r? `@ChrisDenton`
| -rw-r--r-- | library/std/src/sys/pal/windows/alloc.rs | 55 |
1 files changed, 36 insertions, 19 deletions
diff --git a/library/std/src/sys/pal/windows/alloc.rs b/library/std/src/sys/pal/windows/alloc.rs index 270eca37b14..681d1a5efe9 100644 --- a/library/std/src/sys/pal/windows/alloc.rs +++ b/library/std/src/sys/pal/windows/alloc.rs @@ -6,6 +6,7 @@ use crate::ptr; use crate::sync::atomic::{AtomicPtr, Ordering}; use crate::sys::c; use crate::sys::common::alloc::{realloc_fallback, MIN_ALIGN}; +use core::mem::MaybeUninit; #[cfg(test)] mod tests; @@ -94,29 +95,30 @@ static HEAP: AtomicPtr<c_void> = AtomicPtr::new(ptr::null_mut()); // a non-null handle returned by `GetProcessHeap`. #[inline] fn init_or_get_process_heap() -> c::HANDLE { - let heap = HEAP.load(Ordering::Relaxed); - if core::intrinsics::unlikely(heap.is_null()) { - // `HEAP` has not yet been successfully initialized - let heap = unsafe { GetProcessHeap() }; - if !heap.is_null() { - // SAFETY: No locking is needed because within the same process, - // successful calls to `GetProcessHeap` will always return the same value, even on different threads. - HEAP.store(heap, Ordering::Release); - - // SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap` - heap - } else { - // Could not get the current process heap. - ptr::null_mut() - } - } else { + // `HEAP` has not yet been successfully initialized + let heap = unsafe { GetProcessHeap() }; + if !heap.is_null() { + // SAFETY: No locking is needed because within the same process, + // successful calls to `GetProcessHeap` will always return the same value, even on different threads. + HEAP.store(heap, Ordering::Release); + // SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap` heap + } else { + // Could not get the current process heap. + ptr::null_mut() } } +/// This is outlined from `process_heap_alloc` so that `process_heap_alloc` +/// does not need any stack allocations. #[inline(never)] -fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID { +#[cold] +extern "C" fn process_heap_init_and_alloc( + _heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc` + flags: c::DWORD, + dwBytes: c::SIZE_T, +) -> c::LPVOID { let heap = init_or_get_process_heap(); if core::intrinsics::unlikely(heap.is_null()) { return ptr::null_mut(); @@ -125,6 +127,21 @@ fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID { unsafe { HeapAlloc(heap, flags, dwBytes) } } +#[inline(never)] +fn process_heap_alloc( + _heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`, + flags: c::DWORD, + dwBytes: c::SIZE_T, +) -> c::LPVOID { + let heap = HEAP.load(Ordering::Relaxed); + if core::intrinsics::likely(!heap.is_null()) { + // SAFETY: `heap` is a non-null handle returned by `GetProcessHeap`. + unsafe { HeapAlloc(heap, flags, dwBytes) } + } else { + process_heap_init_and_alloc(MaybeUninit::uninit(), flags, dwBytes) + } +} + // Get a non-null handle to the default heap of the current process. // SAFETY: `HEAP` must have been successfully initialized. #[inline] @@ -148,12 +165,12 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 { if layout.align() <= MIN_ALIGN { // The returned pointer points to the start of an allocated block. - process_heap_alloc(flags, layout.size()) as *mut u8 + process_heap_alloc(MaybeUninit::uninit(), flags, layout.size()) as *mut u8 } else { // Allocate extra padding in order to be able to satisfy the alignment. let total = layout.align() + layout.size(); - let ptr = process_heap_alloc(flags, total) as *mut u8; + let ptr = process_heap_alloc(MaybeUninit::uninit(), flags, total) as *mut u8; if ptr.is_null() { // Allocation has failed. return ptr::null_mut(); |
