diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2013-10-17 01:40:33 -0700 |
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2013-10-19 09:43:31 -0700 |
| commit | 6d8330afb6c925d1092f27919f61d4ce6a3fb1d4 (patch) | |
| tree | 72a6ff826ffcd13afe10a79cc4a4a78384cf2f16 /src/libstd/rt | |
| parent | d773a024a2976f2759235551a52101cd08b37cce (diff) | |
| download | rust-6d8330afb6c925d1092f27919f61d4ce6a3fb1d4.tar.gz rust-6d8330afb6c925d1092f27919f61d4ce6a3fb1d4.zip | |
Use __morestack to detect stack overflow
This commit resumes management of the stack boundaries and limits when switching between tasks. This additionally leverages the __morestack function to run code on "stack overflow". The current behavior is to abort the process, but this is probably not the best behavior in the long term (for deails, see the comment I wrote up in the stack exhaustion routine).
Diffstat (limited to 'src/libstd/rt')
| -rw-r--r-- | src/libstd/rt/context.rs | 283 | ||||
| -rw-r--r-- | src/libstd/rt/crate_map.rs | 2 | ||||
| -rw-r--r-- | src/libstd/rt/env.rs | 2 | ||||
| -rw-r--r-- | src/libstd/rt/sched.rs | 2 | ||||
| -rw-r--r-- | src/libstd/rt/task.rs | 102 | ||||
| -rw-r--r-- | src/libstd/rt/thread.rs | 43 | ||||
| -rw-r--r-- | src/libstd/rt/thread_local_storage.rs | 45 |
7 files changed, 412 insertions, 67 deletions
diff --git a/src/libstd/rt/context.rs b/src/libstd/rt/context.rs index 853cc08a0ba..f4616a8e183 100644 --- a/src/libstd/rt/context.rs +++ b/src/libstd/rt/context.rs @@ -11,9 +11,12 @@ use option::*; use super::stack::StackSegment; use libc::c_void; +use uint; use cast::{transmute, transmute_mut_unsafe, transmute_region, transmute_mut_region}; +pub static RED_ZONE: uint = 20 * 1024; + // FIXME #7761: Registers is boxed so that it is 16-byte aligned, for storing // SSE regs. It would be marginally better not to do this. In C++ we // use an attribute on a struct. @@ -24,14 +27,17 @@ pub struct Context { /// The context entry point, saved here for later destruction start: Option<~~fn()>, /// Hold the registers while the task or scheduler is suspended - regs: ~Registers + regs: ~Registers, + /// Lower bound and upper bound for the stack + stack_bounds: Option<(uint, uint)>, } impl Context { pub fn empty() -> Context { Context { start: None, - regs: new_regs() + regs: new_regs(), + stack_bounds: None, } } @@ -47,7 +53,6 @@ impl Context { let fp: *c_void = task_start_wrapper as *c_void; let argp: *c_void = unsafe { transmute::<&~fn(), *c_void>(&*start) }; - let stack_base: *uint = stack.start(); let sp: *uint = stack.end(); let sp: *mut uint = unsafe { transmute_mut_unsafe(sp) }; // Save and then immediately load the current context, @@ -57,11 +62,23 @@ impl Context { swap_registers(transmute_mut_region(&mut *regs), transmute_region(&*regs)); }; - initialize_call_frame(&mut *regs, fp, argp, sp, stack_base); + initialize_call_frame(&mut *regs, fp, argp, sp); + // Scheduler tasks don't have a stack in the "we allocated it" sense, + // but rather they run on pthreads stacks. We have complete control over + // them in terms of the code running on them (and hopefully they don't + // overflow). Additionally, their coroutine stacks are listed as being + // zero-length, so that's how we detect what's what here. + let stack_base: *uint = stack.start(); + let bounds = if sp as uint == stack_base as uint { + None + } else { + Some((stack_base as uint, sp as uint)) + }; return Context { start: Some(start), - regs: regs + regs: regs, + stack_bounds: bounds, } } @@ -79,8 +96,25 @@ impl Context { let in_regs: &Registers = match in_context { &Context { regs: ~ref r, _ } => r }; - rtdebug!("doing raw swap"); - unsafe { swap_registers(out_regs, in_regs) }; + + rtdebug!("noting the stack limit and doing raw swap"); + + unsafe { + // Right before we switch to the new context, set the new context's + // stack limit in the OS-specified TLS slot. This also means that + // we cannot call any more rust functions after record_stack_bounds + // returns because they would all likely fail due to the limit being + // invalid for the current task. Lucky for us `swap_registers` is a + // C function so we don't have to worry about that! + match in_context.stack_bounds { + Some((lo, hi)) => record_stack_bounds(lo, hi), + // If we're going back to one of the original contexts or + // something that's possibly not a "normal task", then reset + // the stack limit to 0 to make morestack never fail + None => record_stack_bounds(0, uint::max_value), + } + swap_registers(out_regs, in_regs) + } } } @@ -89,6 +123,29 @@ extern { fn swap_registers(out_regs: *mut Registers, in_regs: *Registers); } +// Register contexts used in various architectures +// +// These structures all represent a context of one task throughout its +// execution. Each struct is a representation of the architecture's register +// set. When swapping between tasks, these register sets are used to save off +// the current registers into one struct, and load them all from another. +// +// Note that this is only used for context switching, which means that some of +// the registers may go unused. For example, for architectures with +// callee/caller saved registers, the context will only reflect the callee-saved +// registers. This is because the caller saved registers are already stored +// elsewhere on the stack (if it was necessary anyway). +// +// Additionally, there may be fields on various architectures which are unused +// entirely because they only reflect what is theoretically possible for a +// "complete register set" to show, but user-space cannot alter these registers. +// An example of this would be the segment selectors for x86. +// +// These structures/functions are roughly in-sync with the source files inside +// of src/rt/arch/$arch. The only currently used function from those folders is +// the `swap_registers` function, but that's only because for now segmented +// stacks are disabled. + #[cfg(target_arch = "x86")] struct Registers { eax: u32, ebx: u32, ecx: u32, edx: u32, @@ -109,7 +166,7 @@ fn new_regs() -> ~Registers { #[cfg(target_arch = "x86")] fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void, - sp: *mut uint, _stack_base: *uint) { + sp: *mut uint) { let sp = align_down(sp); let sp = mut_offset(sp, -4); @@ -125,6 +182,8 @@ fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void, regs.ebp = 0; } +// windows requires saving more registers (both general and XMM), so the windows +// register context must be larger. #[cfg(windows, target_arch = "x86_64")] type Registers = [uint, ..34]; #[cfg(not(windows), target_arch = "x86_64")] @@ -137,29 +196,14 @@ fn new_regs() -> ~Registers { ~([0, .. 22]) } #[cfg(target_arch = "x86_64")] fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void, - sp: *mut uint, stack_base: *uint) { + sp: *mut uint) { - // Redefinitions from regs.h + // Redefinitions from rt/arch/x86_64/regs.h static RUSTRT_ARG0: uint = 3; static RUSTRT_RSP: uint = 1; static RUSTRT_IP: uint = 8; static RUSTRT_RBP: uint = 2; - #[cfg(windows)] - fn initialize_tib(regs: &mut Registers, sp: *mut uint, stack_base: *uint) { - // Redefinitions from regs.h - static RUSTRT_ST1: uint = 11; // stack bottom - static RUSTRT_ST2: uint = 12; // stack top - regs[RUSTRT_ST1] = sp as uint; - regs[RUSTRT_ST2] = stack_base as uint; - } - #[cfg(not(windows))] - fn initialize_tib(_: &mut Registers, _: *mut uint, _: *uint) { - } - - // Win64 manages stack range at TIB: %gs:0x08 (top) and %gs:0x10 (bottom) - initialize_tib(regs, sp, stack_base); - let sp = align_down(sp); let sp = mut_offset(sp, -1); @@ -167,9 +211,9 @@ fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void, unsafe { *sp = 0; } rtdebug!("creating call frame"); - rtdebug!("fptr {}", fptr as uint); - rtdebug!("arg {}", arg as uint); - rtdebug!("sp {}", sp as uint); + rtdebug!("fptr {}", fptr); + rtdebug!("arg {}", arg); + rtdebug!("sp {}", sp); regs[RUSTRT_ARG0] = arg as uint; regs[RUSTRT_RSP] = sp as uint; @@ -187,7 +231,7 @@ fn new_regs() -> ~Registers { ~([0, .. 32]) } #[cfg(target_arch = "arm")] fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void, - sp: *mut uint, _stack_base: *uint) { + sp: *mut uint) { let sp = align_down(sp); // sp of arm eabi is 8-byte aligned let sp = mut_offset(sp, -2); @@ -208,7 +252,7 @@ fn new_regs() -> ~Registers { ~([0, .. 32]) } #[cfg(target_arch = "mips")] fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void, - sp: *mut uint, _stack_base: *uint) { + sp: *mut uint) { let sp = align_down(sp); // sp of mips o32 is 8-byte aligned let sp = mut_offset(sp, -2); @@ -236,3 +280,182 @@ pub fn mut_offset<T>(ptr: *mut T, count: int) -> *mut T { use std::sys::size_of; (ptr as int + count * (size_of::<T>() as int)) as *mut T } + +#[inline(always)] +pub unsafe fn record_stack_bounds(stack_lo: uint, stack_hi: uint) { + // When the old runtime had segmented stacks, it used a calculation that was + // "limit + RED_ZONE + FUDGE". The red zone was for things like dynamic + // symbol resolution, llvm function calls, etc. In theory this red zone + // value is 0, but it matters far less when we have gigantic stacks because + // we don't need to be so exact about our stack budget. The "fudge factor" + // was because LLVM doesn't emit a stack check for functions < 256 bytes in + // size. Again though, we have giant stacks, so we round all these + // calculations up to the nice round number of 20k. + record_sp_limit(stack_lo + RED_ZONE); + + return target_record_stack_bounds(stack_lo, stack_hi); + + #[cfg(not(windows))] #[cfg(not(target_arch = "x86_64"))] #[inline(always)] + unsafe fn target_record_stack_bounds(_stack_lo: uint, _stack_hi: uint) {} + #[cfg(windows, target_arch = "x86_64")] #[inline(always)] + unsafe fn target_record_stack_bounds(stack_lo: uint, stack_hi: uint) { + // Windows compiles C functions which may check the stack bounds. This + // means that if we want to perform valid FFI on windows, then we need + // to ensure that the stack bounds are what they truly are for this + // task. More info can be found at: + // https://github.com/mozilla/rust/issues/3445#issuecomment-26114839 + // + // stack range is at TIB: %gs:0x08 (top) and %gs:0x10 (bottom) + asm!("mov $0, %gs:0x08" :: "r"(stack_lo) :: "volatile"); + asm!("mov $0, %gs:0x10" :: "r"(stack_hi) :: "volatile"); + } +} + +/// Records the current limit of the stack as specified by `end`. +/// +/// This is stored in an OS-dependent location, likely inside of the thread +/// local storage. The location that the limit is stored is a pre-ordained +/// location because it's where LLVM has emitted code to check. +/// +/// Note that this cannot be called under normal circumstances. This function is +/// changing the stack limit, so upon returning any further function calls will +/// possibly be triggering the morestack logic if you're not careful. +/// +/// Also note that this and all of the inside functions are all flagged as +/// "inline(always)" because they're messing around with the stack limits. This +/// would be unfortunate for the functions themselves to trigger a morestack +/// invocation (if they were an actual function call). +#[inline(always)] +pub unsafe fn record_sp_limit(limit: uint) { + return target_record_sp_limit(limit); + + // x86-64 + #[cfg(target_arch = "x86_64", target_os = "macos")] #[inline(always)] + unsafe fn target_record_sp_limit(limit: uint) { + asm!("movq $$0x60+90*8, %rsi + movq $0, %gs:(%rsi)" :: "r"(limit) : "rsi" : "volatile") + } + #[cfg(target_arch = "x86_64", target_os = "linux")] #[inline(always)] + unsafe fn target_record_sp_limit(limit: uint) { + asm!("movq $0, %fs:112" :: "r"(limit) :: "volatile") + } + #[cfg(target_arch = "x86_64", target_os = "win32")] #[inline(always)] + unsafe fn target_record_sp_limit(limit: uint) { + // see: http://en.wikipedia.org/wiki/Win32_Thread_Information_Block + // store this inside of the "arbitrary data slot", but double the size + // because this is 64 bit instead of 32 bit + asm!("movq $0, %gs:0x28" :: "r"(limit) :: "volatile") + } + #[cfg(target_arch = "x86_64", target_os = "freebsd")] #[inline(always)] + unsafe fn target_record_sp_limit(limit: uint) { + asm!("movq $0, %fs:24" :: "r"(limit) :: "volatile") + } + + // x86 + #[cfg(target_arch = "x86", target_os = "macos")] #[inline(always)] + unsafe fn target_record_sp_limit(limit: uint) { + asm!("movl $$0x48+90*4, %eax + movl $0, %gs:(%eax)" :: "r"(limit) : "eax" : "volatile") + } + #[cfg(target_arch = "x86", target_os = "linux")] + #[cfg(target_arch = "x86", target_os = "freebsd")] #[inline(always)] + unsafe fn target_record_sp_limit(limit: uint) { + asm!("movl $0, %gs:48" :: "r"(limit) :: "volatile") + } + #[cfg(target_arch = "x86", target_os = "win32")] #[inline(always)] + unsafe fn target_record_sp_limit(limit: uint) { + // see: http://en.wikipedia.org/wiki/Win32_Thread_Information_Block + // store this inside of the "arbitrary data slot" + asm!("movl $0, %fs:0x14" :: "r"(limit) :: "volatile") + } + + // mips, arm - Some brave soul can port these to inline asm, but it's over + // my head personally + #[cfg(target_arch = "mips")] + #[cfg(target_arch = "arm")] #[inline(always)] + unsafe fn target_record_sp_limit(limit: uint) { + return record_sp_limit(limit as *c_void); + extern { + #[rust_stack] + fn record_sp_limit(limit: *c_void); + } + } +} + +/// The counterpart of the function above, this function will fetch the current +/// stack limit stored in TLS. +/// +/// Note that all of these functions are meant to be exact counterparts of their +/// brethren above, except that the operands are reversed. +/// +/// As with the setter, this function does not have a __morestack header and can +/// therefore be called in a "we're out of stack" situation. +#[inline(always)] +// NOTE: after the next snapshot, can remove the initialization before inline +// assembly due to an improvement in how it's handled, then this specific +// allow directive should get removed. +#[allow(dead_assignment)] +pub unsafe fn get_sp_limit() -> uint { + return target_get_sp_limit(); + + // x86-64 + #[cfg(target_arch = "x86_64", target_os = "macos")] #[inline(always)] + unsafe fn target_get_sp_limit() -> uint { + let mut limit: uint = 0; + asm!("movq $$0x60+90*8, %rsi + movq %gs:(%rsi), $0" : "=r"(limit) :: "rsi" : "volatile"); + return limit; + } + #[cfg(target_arch = "x86_64", target_os = "linux")] #[inline(always)] + unsafe fn target_get_sp_limit() -> uint { + let mut limit: uint = 0; + asm!("movq %fs:112, $0" : "=r"(limit) ::: "volatile"); + return limit; + } + #[cfg(target_arch = "x86_64", target_os = "win32")] #[inline(always)] + unsafe fn target_get_sp_limit() -> uint { + let mut limit: uint = 0; + asm!("movq %gs:0x28, $0" : "=r"(limit) ::: "volatile"); + return limit; + } + #[cfg(target_arch = "x86_64", target_os = "freebsd")] #[inline(always)] + unsafe fn target_get_sp_limit() -> uint { + let mut limit: uint = 0; + asm!("movq %fs:24, $0" : "=r"(limit) ::: "volatile"); + return limit; + } + + // x86 + #[cfg(target_arch = "x86", target_os = "macos")] #[inline(always)] + unsafe fn target_get_sp_limit() -> uint { + let mut limit: uint = 0; + asm!("movl $$0x48+90*4, %eax + movl %gs:(%eax), $0" : "=r"(limit) :: "eax" : "volatile"); + return limit; + } + #[cfg(target_arch = "x86", target_os = "linux")] + #[cfg(target_arch = "x86", target_os = "freebsd")] #[inline(always)] + unsafe fn target_get_sp_limit() -> uint { + let mut limit: uint = 0; + asm!("movl %gs:48, $0" : "=r"(limit) ::: "volatile"); + return limit; + } + #[cfg(target_arch = "x86", target_os = "win32")] #[inline(always)] + unsafe fn target_get_sp_limit() -> uint { + let mut limit: uint = 0; + asm!("movl %fs:0x14, $0" : "=r"(limit) ::: "volatile"); + return limit; + } + + // mips, arm - Some brave soul can port these to inline asm, but it's over + // my head personally + #[cfg(target_arch = "mips")] + #[cfg(target_arch = "arm")] #[inline(always)] + unsafe fn target_get_sp_limit() -> uint { + return get_sp_limit() as uint; + extern { + #[rust_stack] + fn get_sp_limit() -> *c_void; + } + } +} diff --git a/src/libstd/rt/crate_map.rs b/src/libstd/rt/crate_map.rs index 8785dcca7bd..96a0069e851 100644 --- a/src/libstd/rt/crate_map.rs +++ b/src/libstd/rt/crate_map.rs @@ -17,7 +17,7 @@ use vec::ImmutableVector; // and instead look them up at runtime, which we need to resolve // the crate_map properly. #[cfg(target_os = "macos")] -#[link_args = "-undefined dynamic_lookup"] +#[link_args = "-Wl,-U,__rust_crate_map_toplevel"] extern {} pub struct ModEntry<'self> { diff --git a/src/libstd/rt/env.rs b/src/libstd/rt/env.rs index 5b840655120..c02e7fe9013 100644 --- a/src/libstd/rt/env.rs +++ b/src/libstd/rt/env.rs @@ -17,7 +17,7 @@ use os; // Note that these are all accessed without any synchronization. // They are expected to be initialized once then left alone. -static mut MIN_STACK: uint = 2000000; +static mut MIN_STACK: uint = 4000000; static mut DEBUG_BORROW: bool = false; pub fn init() { diff --git a/src/libstd/rt/sched.rs b/src/libstd/rt/sched.rs index 0a4622bc65e..c636a169037 100644 --- a/src/libstd/rt/sched.rs +++ b/src/libstd/rt/sched.rs @@ -173,7 +173,7 @@ impl Scheduler { // Now that we have an empty task struct for the scheduler // task, put it in TLS. - Local::put::(sched_task); + Local::put(sched_task); // Before starting our first task, make sure the idle callback // is active. As we do not start in the sleep state this is diff --git a/src/libstd/rt/task.rs b/src/libstd/rt/task.rs index d5278975d8d..1b1e4e7d426 100644 --- a/src/libstd/rt/task.rs +++ b/src/libstd/rt/task.rs @@ -29,6 +29,7 @@ use rt::logging::StdErrLogger; use super::local_heap::LocalHeap; use rt::sched::{Scheduler, SchedHandle}; use rt::stack::{StackSegment, StackPool}; +use rt::context; use rt::context::Context; use unstable::finally::Finally; use task::spawn::Taskgroup; @@ -465,6 +466,80 @@ impl Unwinder { } } +/// This function is invoked from rust's current __morestack function. Segmented +/// stacks are currently not enabled as segmented stacks, but rather one giant +/// stack segment. This means that whenever we run out of stack, we want to +/// truly consider it to be stack overflow rather than allocating a new stack. +#[no_mangle] // - this is called from C code +#[no_split_stack] // - it would be sad for this function to trigger __morestack +#[doc(hidden)] // XXX: this function shouldn't have to be `pub` to get exported + // so it can be linked against, we should have a better way + // of specifying that. +pub extern "C" fn rust_stack_exhausted() { + use rt::in_green_task_context; + use rt::task::Task; + use rt::local::Local; + use rt::logging::Logger; + use unstable::intrinsics; + + unsafe { + // We're calling this function because the stack just ran out. We need + // to call some other rust functions, but if we invoke the functions + // right now it'll just trigger this handler being called again. In + // order to alleviate this, we move the stack limit to be inside of the + // red zone that was allocated for exactly this reason. + let limit = context::get_sp_limit(); + context::record_sp_limit(limit - context::RED_ZONE / 2); + + // This probably isn't the best course of action. Ideally one would want + // to unwind the stack here instead of just aborting the entire process. + // This is a tricky problem, however. There's a few things which need to + // be considered: + // + // 1. We're here because of a stack overflow, yet unwinding will run + // destructors and hence arbitrary code. What if that code overflows + // the stack? One possibility is to use the above allocation of an + // extra 10k to hope that we don't hit the limit, and if we do then + // abort the whole program. Not the best, but kind of hard to deal + // with unless we want to switch stacks. + // + // 2. LLVM will optimize functions based on whether they can unwind or + // not. It will flag functions with 'nounwind' if it believes that + // the function cannot trigger unwinding, but if we do unwind on + // stack overflow then it means that we could unwind in any function + // anywhere. We would have to make sure that LLVM only places the + // nounwind flag on functions which don't call any other functions. + // + // 3. The function that overflowed may have owned arguments. These + // arguments need to have their destructors run, but we haven't even + // begun executing the function yet, so unwinding will not run the + // any landing pads for these functions. If this is ignored, then + // the arguments will just be leaked. + // + // Exactly what to do here is a very delicate topic, and is possibly + // still up in the air for what exactly to do. Some relevant issues: + // + // #3555 - out-of-stack failure leaks arguments + // #3695 - should there be a stack limit? + // #9855 - possible strategies which could be taken + // #9854 - unwinding on windows through __morestack has never worked + // #2361 - possible implementation of not using landing pads + + if in_green_task_context() { + do Local::borrow |task: &mut Task| { + let n = task.name.as_ref().map(|n| n.as_slice()).unwrap_or("<unnamed>"); + + format_args!(|args| { task.logger.log(args) }, + "task '{}' has overflowed its stack", n); + } + } else { + rterrln!("stack overflow in non-task context"); + } + + intrinsics::abort(); + } +} + /// This is the entry point of unwinding for things like lang items and such. /// The arguments are normally generated by the compiler. pub fn begin_unwind(msg: *c_char, file: *c_char, line: size_t) -> ! { @@ -481,22 +556,33 @@ pub fn begin_unwind(msg: *c_char, file: *c_char, line: size_t) -> ! { let msg = match msg.as_str() { Some(s) => s, None => rtabort!("message wasn't utf8?") }; - let file = match file.as_str() { - Some(s) => s, None => rtabort!("message wasn't utf8?") - }; if in_green_task_context() { // Be careful not to allocate in this block, if we're failing we may // have been failing due to a lack of memory in the first place... do Local::borrow |task: &mut Task| { let n = task.name.as_ref().map(|n| n.as_slice()).unwrap_or("<unnamed>"); - format_args!(|args| { task.logger.log(args) }, - "task '{}' failed at '{}', {}:{}", - n, msg, file, line); + + match file.as_str() { + Some(file) => { + format_args!(|args| { task.logger.log(args) }, + "task '{}' failed at '{}', {}:{}", + n, msg, file, line); + } + None => { + format_args!(|args| { task.logger.log(args) }, + "task '{}' failed at '{}'", n, msg); + } + } } } else { - rterrln!("failed in non-task context at '{}', {}:{}", - msg, file, line as int); + match file.as_str() { + Some(file) => { + rterrln!("failed in non-task context at '{}', {}:{}", + msg, file, line as int); + } + None => rterrln!("failed in non-task context at '{}'", msg), + } } let task: *mut Task = Local::unsafe_borrow(); diff --git a/src/libstd/rt/thread.rs b/src/libstd/rt/thread.rs index 8b64fda2136..e774b81da35 100644 --- a/src/libstd/rt/thread.rs +++ b/src/libstd/rt/thread.rs @@ -8,8 +8,11 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use cast; use libc; use ops::Drop; +use unstable::raw; +use uint; #[allow(non_camel_case_types)] // runtime type type raw_thread = libc::c_void; @@ -17,21 +20,38 @@ type raw_thread = libc::c_void; pub struct Thread { main: ~fn(), raw_thread: *raw_thread, - joined: bool + joined: bool, } impl Thread { + #[fixed_stack_segment] #[inline(never)] pub fn start(main: ~fn()) -> Thread { - fn substart(main: &~fn()) -> *raw_thread { - #[fixed_stack_segment]; #[inline(never)]; - - unsafe { rust_raw_thread_start(main) } + // This is the starting point of rust os threads. The first thing we do + // is make sure that we don't trigger __morestack (also why this has a + // no_split_stack annotation), and then we re-build the main function + // and invoke it from there. + #[no_split_stack] + extern "C" fn thread_start(code: *(), env: *()) { + use rt::context; + unsafe { + context::record_stack_bounds(0, uint::max_value); + let f: &fn() = cast::transmute(raw::Closure { + code: code, + env: env, + }); + f(); + } } - let raw = substart(&main); + + let raw_thread = unsafe { + let c: raw::Closure = cast::transmute_copy(&main); + let raw::Closure { code, env } = c; + rust_raw_thread_start(thread_start, code, env) + }; Thread { main: main, - raw_thread: raw, - joined: false + raw_thread: raw_thread, + joined: false, } } @@ -55,7 +75,8 @@ impl Drop for Thread { } extern { - pub fn rust_raw_thread_start(f: &(~fn())) -> *raw_thread; - pub fn rust_raw_thread_join(thread: *raw_thread); - pub fn rust_raw_thread_delete(thread: *raw_thread); + fn rust_raw_thread_start(f: extern "C" fn(*(), *()), + code: *(), env: *()) -> *raw_thread; + fn rust_raw_thread_join(thread: *raw_thread); + fn rust_raw_thread_delete(thread: *raw_thread); } diff --git a/src/libstd/rt/thread_local_storage.rs b/src/libstd/rt/thread_local_storage.rs index cd89d09ffc0..ddb104240f2 100644 --- a/src/libstd/rt/thread_local_storage.rs +++ b/src/libstd/rt/thread_local_storage.rs @@ -27,15 +27,11 @@ pub unsafe fn create(key: &mut Key) { } #[cfg(unix)] -#[fixed_stack_segment] -#[inline(never)] pub unsafe fn set(key: Key, value: *mut c_void) { assert_eq!(0, pthread_setspecific(key, value)); } #[cfg(unix)] -#[fixed_stack_segment] -#[inline(never)] pub unsafe fn get(key: Key) -> *mut c_void { pthread_getspecific(key) } @@ -53,8 +49,21 @@ type pthread_key_t = ::libc::c_uint; #[cfg(unix)] extern { fn pthread_key_create(key: *mut pthread_key_t, dtor: *u8) -> c_int; - fn pthread_setspecific(key: pthread_key_t, value: *mut c_void) -> c_int; + + // This function is a very cheap operation on both osx and unix. On osx, it + // turns out it's just three instructions, and on unix it's a cheap function + // which only uses a very small amount of stack. + // + // This is not marked as such because we think it has a small stack, but + // rather we would like to be able to fetch information from + // thread-local-storage when a task is running very low on its stack budget. + // For example, this is invoked whenever stack overflow is detected, and we + // obviously have very little budget to deal with (certainly not anything + // close to a fixed_stack_segment) + #[rust_stack] fn pthread_getspecific(key: pthread_key_t) -> *mut c_void; + #[rust_stack] + fn pthread_setspecific(key: pthread_key_t, value: *mut c_void) -> c_int; } #[cfg(windows)] @@ -70,31 +79,37 @@ pub unsafe fn create(key: &mut Key) { } #[cfg(windows)] -#[fixed_stack_segment] -#[inline(never)] pub unsafe fn set(key: Key, value: *mut c_void) { assert!(0 != TlsSetValue(key, value)) } #[cfg(windows)] -#[fixed_stack_segment] -#[inline(never)] pub unsafe fn get(key: Key) -> *mut c_void { TlsGetValue(key) } #[cfg(windows, target_arch = "x86")] extern "stdcall" { - fn TlsAlloc() -> DWORD; - fn TlsSetValue(dwTlsIndex: DWORD, lpTlsvalue: LPVOID) -> BOOL; - fn TlsGetValue(dwTlsIndex: DWORD) -> LPVOID; + fn TlsAlloc() -> DWORD; + + // See the reasoning in pthread_getspecific as to why this has the + // 'rust_stack' attribute, as this function was also verified to only + // require a small amount of stack. + #[rust_stack] + fn TlsGetValue(dwTlsIndex: DWORD) -> LPVOID; + #[rust_stack] + fn TlsSetValue(dwTlsIndex: DWORD, lpTlsvalue: LPVOID) -> BOOL; } #[cfg(windows, target_arch = "x86_64")] extern { - fn TlsAlloc() -> DWORD; - fn TlsSetValue(dwTlsIndex: DWORD, lpTlsvalue: LPVOID) -> BOOL; - fn TlsGetValue(dwTlsIndex: DWORD) -> LPVOID; + fn TlsAlloc() -> DWORD; + + // See above. + #[rust_stack] + fn TlsGetValue(dwTlsIndex: DWORD) -> LPVOID; + #[rust_stack] + fn TlsSetValue(dwTlsIndex: DWORD, lpTlsvalue: LPVOID) -> BOOL; } #[test] |
