about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-02-13 20:36:55 -0800
committerbors <bors@rust-lang.org>2014-02-13 20:36:55 -0800
commit22c34f3c4cddea33b916eb92f8d7286b02b865a7 (patch)
treedd36da3994499e5bac9aba767edc126f36d54f67
parent68129d299b54806b6aa4ec9f3a0755854db7b491 (diff)
parent301ff0c2df3d26a5b287ab61d80f5ca7845e827b (diff)
downloadrust-22c34f3c4cddea33b916eb92f8d7286b02b865a7.tar.gz
rust-22c34f3c4cddea33b916eb92f8d7286b02b865a7.zip
auto merge of #12172 : alexcrichton/rust/green-improvements, r=brson
These commits pick off some low-hanging fruit which were slowing down spawning green threads. The major speedup comes from fixing a bug in stack caching where we never used any cached stacks!

The program I used to benchmark is at the end. It was compiled with `rustc --opt-level=3 bench.rs --test` and run as `RUST_THREADS=1 ./bench --bench`. I chose to use `RUST_THREADS=1` due to #11730 as the profiles I was getting interfered too much when all the schedulers were in play (and shouldn't be after #11730 is fixed). All of the units below are in ns/iter as reported by `--bench` (lower is better).

|               | green | native | raw    |
| ------------- | ----- | ------ | ------ |
| osx before    | 12699 | 24030  | 19734  |
| linux before  | 10223 | 125983 | 122647 |
| osx after     |  3847 | 25771  | 20835  |
| linux after   |  2631 | 135398 | 122765 |

Note that this is *not* a benchmark of spawning green tasks vs native tasks. I put in the native numbers just to get a ballpark of where green tasks are. This is benchmark is *clearly* benefiting from stack caching. Also, OSX is clearly not 5x faster than linux, I think my VM is just much slower.

All in all, this ended up being a nice 4x speedup for spawning a green task when you're using a cached stack.

```rust
extern mod extra;
extern mod native;
use std::rt::thread::Thread;

#[bench]
fn green(bh: &mut extra::test::BenchHarness) {
    let (p, c) = SharedChan::new();
    bh.iter(|| {
        let c = c.clone();
        spawn(proc() {
            c.send(());
        });
        p.recv();
    });
}

#[bench]
fn native(bh: &mut extra::test::BenchHarness) {
    let (p, c) = SharedChan::new();
    bh.iter(|| {
        let c = c.clone();
        native::task::spawn(proc() {
            c.send(());
        });
        p.recv();
    });
}

#[bench]
fn raw(bh: &mut extra::test::BenchHarness) {
    bh.iter(|| {
        Thread::start(proc() {}).join()
    });
}
```
-rw-r--r--mk/crates.mk2
-rw-r--r--mk/rt.mk5
-rw-r--r--src/libgreen/context.rs113
-rw-r--r--src/libgreen/coroutine.rs18
-rw-r--r--src/libgreen/sched.rs2
-rw-r--r--src/libgreen/stack.rs34
-rw-r--r--src/libgreen/task.rs113
-rw-r--r--src/libnative/task.rs7
-rw-r--r--src/libstd/rt/local_heap.rs15
-rw-r--r--src/libstd/rt/task.rs16
-rw-r--r--src/libstd/unstable/raw.rs6
-rw-r--r--src/rt/arch/arm/_context.S8
-rw-r--r--src/rt/arch/x86_64/_context.S33
13 files changed, 231 insertions, 141 deletions
diff --git a/mk/crates.mk b/mk/crates.mk
index d7365a827b7..80231ad2ba4 100644
--- a/mk/crates.mk
+++ b/mk/crates.mk
@@ -57,7 +57,7 @@ TOOLS := compiletest rustdoc rustc
 
 DEPS_std := native:rustrt native:compiler-rt
 DEPS_extra := std term sync serialize getopts collections
-DEPS_green := std
+DEPS_green := std native:context_switch
 DEPS_rustuv := std native:uv native:uv_support
 DEPS_native := std
 DEPS_syntax := std extra term serialize collections
diff --git a/mk/rt.mk b/mk/rt.mk
index ebb1f83398e..10b73c6b395 100644
--- a/mk/rt.mk
+++ b/mk/rt.mk
@@ -35,7 +35,7 @@
 # that's per-target so you're allowed to conditionally add files based on the
 # target.
 ################################################################################
-NATIVE_LIBS := rustrt sundown uv_support morestack miniz
+NATIVE_LIBS := rustrt sundown uv_support morestack miniz context_switch
 
 # $(1) is the target triple
 define NATIVE_LIBRARIES
@@ -54,9 +54,10 @@ NATIVE_DEPS_rustrt_$(1) := rust_builtin.c \
 			rust_android_dummy.c \
 			rust_test_helpers.c \
 			rust_try.ll \
-			arch/$$(HOST_$(1))/_context.S \
 			arch/$$(HOST_$(1))/record_sp.S
 NATIVE_DEPS_morestack_$(1) := arch/$$(HOST_$(1))/morestack.S
+NATIVE_DEPS_context_switch_$(1) := \
+			arch/$$(HOST_$(1))/_context.S
 
 ################################################################################
 # You shouldn't find it that necessary to edit anything below this line.
diff --git a/src/libgreen/context.rs b/src/libgreen/context.rs
index 4e626b3bec7..58188ede13c 100644
--- a/src/libgreen/context.rs
+++ b/src/libgreen/context.rs
@@ -8,12 +8,12 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-use std::libc::c_void;
 use std::uint;
 use std::cast::{transmute, transmute_mut_unsafe,
                 transmute_region, transmute_mut_region};
 use stack::Stack;
 use std::unstable::stack;
+use std::unstable::raw;
 
 // FIXME #7761: Registers is boxed so that it is 16-byte aligned, for storing
 // SSE regs.  It would be marginally better not to do this. In C++ we
@@ -22,47 +22,33 @@ use std::unstable::stack;
 // the registers are sometimes empty, but the discriminant would
 // then misalign the regs again.
 pub struct Context {
-    /// The context entry point, saved here for later destruction
-    priv start: Option<~proc()>,
     /// Hold the registers while the task or scheduler is suspended
     priv regs: ~Registers,
     /// Lower bound and upper bound for the stack
     priv stack_bounds: Option<(uint, uint)>,
 }
 
+pub type InitFn = extern "C" fn(uint, *(), *()) -> !;
+
 impl Context {
     pub fn empty() -> Context {
         Context {
-            start: None,
             regs: new_regs(),
             stack_bounds: None,
         }
     }
 
     /// Create a new context that will resume execution by running proc()
-    pub fn new(start: proc(), stack: &mut Stack) -> Context {
-        // The C-ABI function that is the task entry point
-        //
-        // Note that this function is a little sketchy. We're taking a
-        // procedure, transmuting it to a stack-closure, and then calling to
-        // closure. This leverages the fact that the representation of these two
-        // types is the same.
-        //
-        // The reason that we're doing this is that this procedure is expected
-        // to never return. The codegen which frees the environment of the
-        // procedure occurs *after* the procedure has completed, and this means
-        // that we'll never actually free the procedure.
-        //
-        // To solve this, we use this transmute (to not trigger the procedure
-        // deallocation here), and then store a copy of the procedure in the
-        // `Context` structure returned. When the `Context` is deallocated, then
-        // the entire procedure box will be deallocated as well.
-        extern fn task_start_wrapper(f: &proc()) {
-            unsafe {
-                let f: &|| = transmute(f);
-                (*f)()
-            }
-        }
+    ///
+    /// The `init` function will be run with `arg` and the `start` procedure
+    /// split up into code and env pointers. It is required that the `init`
+    /// function never return.
+    ///
+    /// FIXME: this is basically an awful the interface. The main reason for
+    ///        this is to reduce the number of allocations made when a green
+    ///        task is spawned as much as possible
+    pub fn new(init: InitFn, arg: uint, start: proc(),
+               stack: &mut Stack) -> Context {
 
         let sp: *uint = stack.end();
         let sp: *mut uint = unsafe { transmute_mut_unsafe(sp) };
@@ -74,14 +60,10 @@ impl Context {
                                 transmute_region(&*regs));
         };
 
-        // FIXME #7767: Putting main into a ~ so it's a thin pointer and can
-        // be passed to the spawn function.  Another unfortunate
-        // allocation
-        let start = ~start;
-
         initialize_call_frame(&mut *regs,
-                              task_start_wrapper as *c_void,
-                              unsafe { transmute(&*start) },
+                              init,
+                              arg,
+                              unsafe { transmute(start) },
                               sp);
 
         // Scheduler tasks don't have a stack in the "we allocated it" sense,
@@ -96,7 +78,6 @@ impl Context {
             Some((stack_base as uint, sp as uint))
         };
         return Context {
-            start: Some(start),
             regs: regs,
             stack_bounds: bounds,
         }
@@ -138,7 +119,7 @@ impl Context {
     }
 }
 
-#[link(name = "rustrt", kind = "static")]
+#[link(name = "context_switch", kind = "static")]
 extern {
     fn rust_swap_registers(out_regs: *mut Registers, in_regs: *Registers);
 }
@@ -185,13 +166,17 @@ fn new_regs() -> ~Registers {
 }
 
 #[cfg(target_arch = "x86")]
-fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
-                         sp: *mut uint) {
+fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
+                         procedure: raw::Procedure, sp: *mut uint) {
 
+    // x86 has interesting stack alignment requirements, so do some alignment
+    // plus some offsetting to figure out what the actual stack should be.
     let sp = align_down(sp);
     let sp = mut_offset(sp, -4);
 
-    unsafe { *sp = arg as uint };
+    unsafe { *mut_offset(sp, 2) = procedure.env as uint };
+    unsafe { *mut_offset(sp, 1) = procedure.code as uint };
+    unsafe { *mut_offset(sp, 0) = arg as uint };
     let sp = mut_offset(sp, -1);
     unsafe { *sp = 0 }; // The final return address
 
@@ -215,14 +200,18 @@ fn new_regs() -> ~Registers { ~([0, .. 34]) }
 fn new_regs() -> ~Registers { ~([0, .. 22]) }
 
 #[cfg(target_arch = "x86_64")]
-fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
-                         sp: *mut uint) {
+fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
+                         procedure: raw::Procedure, sp: *mut uint) {
+    extern { fn rust_bootstrap_green_task(); }
 
     // Redefinitions from rt/arch/x86_64/regs.h
-    static RUSTRT_ARG0: uint = 3;
     static RUSTRT_RSP: uint = 1;
     static RUSTRT_IP: uint = 8;
     static RUSTRT_RBP: uint = 2;
+    static RUSTRT_R12: uint = 4;
+    static RUSTRT_R13: uint = 5;
+    static RUSTRT_R14: uint = 6;
+    static RUSTRT_R15: uint = 7;
 
     let sp = align_down(sp);
     let sp = mut_offset(sp, -1);
@@ -231,13 +220,23 @@ fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
     unsafe { *sp = 0; }
 
     rtdebug!("creating call frame");
-    rtdebug!("fptr {}", fptr);
-    rtdebug!("arg {}", arg);
+    rtdebug!("fptr {:#x}", fptr as uint);
+    rtdebug!("arg {:#x}", arg);
     rtdebug!("sp {}", sp);
 
-    regs[RUSTRT_ARG0] = arg as uint;
+    // These registers are frobbed by rust_bootstrap_green_task into the right
+    // location so we can invoke the "real init function", `fptr`.
+    regs[RUSTRT_R12] = arg as uint;
+    regs[RUSTRT_R13] = procedure.code as uint;
+    regs[RUSTRT_R14] = procedure.env as uint;
+    regs[RUSTRT_R15] = fptr as uint;
+
+    // These registers are picked up by the regulard context switch paths. These
+    // will put us in "mostly the right context" except for frobbing all the
+    // arguments to the right place. We have the small trampoline code inside of
+    // rust_bootstrap_green_task to do that.
     regs[RUSTRT_RSP] = sp as uint;
-    regs[RUSTRT_IP] = fptr as uint;
+    regs[RUSTRT_IP] = rust_bootstrap_green_task as uint;
 
     // Last base pointer on the stack should be 0
     regs[RUSTRT_RBP] = 0;
@@ -250,8 +249,10 @@ type Registers = [uint, ..32];
 fn new_regs() -> ~Registers { ~([0, .. 32]) }
 
 #[cfg(target_arch = "arm")]
-fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
-                         sp: *mut uint) {
+fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
+                         procedure: raw::Procedure, sp: *mut uint) {
+    extern { fn rust_bootstrap_green_task(); }
+
     let sp = align_down(sp);
     // sp of arm eabi is 8-byte aligned
     let sp = mut_offset(sp, -2);
@@ -259,9 +260,15 @@ fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
     // The final return address. 0 indicates the bottom of the stack
     unsafe { *sp = 0; }
 
-    regs[0] = arg as uint;   // r0
-    regs[13] = sp as uint;   // #53 sp, r13
-    regs[14] = fptr as uint; // #60 pc, r15 --> lr
+    // ARM uses the same technique as x86_64 to have a landing pad for the start
+    // of all new green tasks. Neither r1/r2 are saved on a context switch, so
+    // the shim will copy r3/r4 into r1/r2 and then execute the function in r5
+    regs[0] = arg as uint;              // r0
+    regs[3] = procedure.code as uint;   // r3
+    regs[4] = procedure.env as uint;    // r4
+    regs[5] = fptr as uint;             // r5
+    regs[13] = sp as uint;                          // #52 sp, r13
+    regs[14] = rust_bootstrap_green_task as uint;   // #56 pc, r14 --> lr
 }
 
 #[cfg(target_arch = "mips")]
@@ -271,8 +278,8 @@ type Registers = [uint, ..32];
 fn new_regs() -> ~Registers { ~([0, .. 32]) }
 
 #[cfg(target_arch = "mips")]
-fn initialize_call_frame(regs: &mut Registers, fptr: *c_void, arg: *c_void,
-                         sp: *mut uint) {
+fn initialize_call_frame(regs: &mut Registers, fptr: InitFn, arg: uint,
+                         procedure: raw::Procedure, sp: *mut uint) {
     let sp = align_down(sp);
     // sp of mips o32 is 8-byte aligned
     let sp = mut_offset(sp, -2);
diff --git a/src/libgreen/coroutine.rs b/src/libgreen/coroutine.rs
index c001d40a246..b20892886c6 100644
--- a/src/libgreen/coroutine.rs
+++ b/src/libgreen/coroutine.rs
@@ -11,8 +11,6 @@
 // Coroutines represent nothing more than a context and a stack
 // segment.
 
-use std::rt::env;
-
 use context::Context;
 use stack::{StackPool, Stack};
 
@@ -31,22 +29,6 @@ pub struct Coroutine {
 }
 
 impl Coroutine {
-    pub fn new(stack_pool: &mut StackPool,
-               stack_size: Option<uint>,
-               start: proc())
-               -> Coroutine {
-        let stack_size = match stack_size {
-            Some(size) => size,
-            None => env::min_stack()
-        };
-        let mut stack = stack_pool.take_stack(stack_size);
-        let initial_context = Context::new(start, &mut stack);
-        Coroutine {
-            current_stack_segment: stack,
-            saved_context: initial_context
-        }
-    }
-
     pub fn empty() -> Coroutine {
         Coroutine {
             current_stack_segment: unsafe { Stack::dummy_stack() },
diff --git a/src/libgreen/sched.rs b/src/libgreen/sched.rs
index bf6e0c3430e..b224b0cabf3 100644
--- a/src/libgreen/sched.rs
+++ b/src/libgreen/sched.rs
@@ -756,7 +756,7 @@ impl Scheduler {
 
     /// Called by a running task to end execution, after which it will
     /// be recycled by the scheduler for reuse in a new task.
-    pub fn terminate_current_task(mut ~self, cur: ~GreenTask) {
+    pub fn terminate_current_task(mut ~self, cur: ~GreenTask) -> ! {
         // Similar to deschedule running task and then, but cannot go through
         // the task-blocking path. The task is already dying.
         let stask = self.sched_task.take_unwrap();
diff --git a/src/libgreen/stack.rs b/src/libgreen/stack.rs
index 4b3db5ef8ed..8a5e6be17c8 100644
--- a/src/libgreen/stack.rs
+++ b/src/libgreen/stack.rs
@@ -138,9 +138,9 @@ impl StackPool {
 
     pub fn take_stack(&mut self, min_size: uint) -> Stack {
         // Ideally this would be a binary search
-        match self.stacks.iter().position(|s| s.min_size < min_size) {
+        match self.stacks.iter().position(|s| min_size <= s.min_size) {
             Some(idx) => self.stacks.swap_remove(idx),
-            None      => Stack::new(min_size)
+            None => Stack::new(min_size)
         }
     }
 
@@ -156,3 +156,33 @@ extern {
                                     end: *libc::uintptr_t) -> libc::c_uint;
     fn rust_valgrind_stack_deregister(id: libc::c_uint);
 }
+
+#[cfg(test)]
+mod tests {
+    use super::StackPool;
+
+    #[test]
+    fn stack_pool_caches() {
+        let mut p = StackPool::new();
+        let s = p.take_stack(10);
+        p.give_stack(s);
+        let s = p.take_stack(4);
+        assert_eq!(s.min_size, 10);
+        p.give_stack(s);
+        let s = p.take_stack(14);
+        assert_eq!(s.min_size, 14);
+        p.give_stack(s);
+    }
+
+    #[test]
+    fn stack_pool_caches_exact() {
+        let mut p = StackPool::new();
+        let mut s = p.take_stack(10);
+        s.valgrind_id = 100;
+        p.give_stack(s);
+
+        let s = p.take_stack(10);
+        assert_eq!(s.min_size, 10);
+        assert_eq!(s.valgrind_id, 100);
+    }
+}
diff --git a/src/libgreen/task.rs b/src/libgreen/task.rs
index e492acb4468..2aca72e35f1 100644
--- a/src/libgreen/task.rs
+++ b/src/libgreen/task.rs
@@ -19,13 +19,16 @@
 //! values.
 
 use std::cast;
+use std::rt::env;
 use std::rt::Runtime;
-use std::rt::rtio;
 use std::rt::local::Local;
-use std::rt::task::{Task, BlockedTask};
+use std::rt::rtio;
+use std::rt::task::{Task, BlockedTask, SendMessage};
 use std::task::TaskOpts;
 use std::unstable::mutex::Mutex;
+use std::unstable::raw;
 
+use context::Context;
 use coroutine::Coroutine;
 use sched::{Scheduler, SchedHandle, RunOnce};
 use stack::StackPool;
@@ -75,6 +78,50 @@ pub enum Home {
     HomeSched(SchedHandle),
 }
 
+/// Trampoline code for all new green tasks which are running around. This
+/// function is passed through to Context::new as the initial rust landing pad
+/// for all green tasks. This code is actually called after the initial context
+/// switch onto a green thread.
+///
+/// The first argument to this function is the `~GreenTask` pointer, and the
+/// next two arguments are the user-provided procedure for running code.
+///
+/// The goal for having this weird-looking function is to reduce the number of
+/// allocations done on a green-task startup as much as possible.
+extern fn bootstrap_green_task(task: uint, code: *(), env: *()) -> ! {
+    // Acquire ownership of the `proc()`
+    let start: proc() = unsafe {
+        cast::transmute(raw::Procedure { code: code, env: env })
+    };
+
+    // Acquire ownership of the `~GreenTask`
+    let mut task: ~GreenTask = unsafe { cast::transmute(task) };
+
+    // First code after swap to this new context. Run our cleanup job
+    task.pool_id = {
+        let sched = task.sched.get_mut_ref();
+        sched.run_cleanup_job();
+        sched.task_state.increment();
+        sched.pool_id
+    };
+
+    // Convert our green task to a libstd task and then execute the code
+    // requested. This is the "try/catch" block for this green task and
+    // is the wrapper for *all* code run in the task.
+    let mut start = Some(start);
+    let task = task.swap().run(|| start.take_unwrap()());
+
+    // Once the function has exited, it's time to run the termination
+    // routine. This means we need to context switch one more time but
+    // clean ourselves up on the other end. Since we have no way of
+    // preserving a handle to the GreenTask down to this point, this
+    // unfortunately must call `GreenTask::convert`. In order to avoid
+    // this we could add a `terminate` function to the `Runtime` trait
+    // in libstd, but that seems less appropriate since the coversion
+    // method exists.
+    GreenTask::convert(task).terminate()
+}
+
 impl GreenTask {
     /// Creates a new green task which is not homed to any particular scheduler
     /// and will not have any contained Task structure.
@@ -89,9 +136,20 @@ impl GreenTask {
                      stack_size: Option<uint>,
                      home: Home,
                      start: proc()) -> ~GreenTask {
+        // Allocate ourselves a GreenTask structure
         let mut ops = GreenTask::new_typed(None, TypeGreen(Some(home)));
-        let start = GreenTask::build_start_wrapper(start, ops.as_uint());
-        ops.coroutine = Some(Coroutine::new(stack_pool, stack_size, start));
+
+        // Allocate a stack for us to run on
+        let stack_size = stack_size.unwrap_or_else(|| env::min_stack());
+        let mut stack = stack_pool.take_stack(stack_size);
+        let context = Context::new(bootstrap_green_task, ops.as_uint(), start,
+                                   &mut stack);
+
+        // Package everything up in a coroutine and return
+        ops.coroutine = Some(Coroutine {
+            current_stack_segment: stack,
+            saved_context: context,
+        });
         return ops;
     }
 
@@ -131,8 +189,7 @@ impl GreenTask {
             task.stdout = stdout;
             match notify_chan {
                 Some(chan) => {
-                    let on_exit = proc(task_result) { chan.send(task_result) };
-                    task.death.on_exit = Some(on_exit);
+                    task.death.on_exit = Some(SendMessage(chan));
                 }
                 None => {}
             }
@@ -157,46 +214,6 @@ impl GreenTask {
         }
     }
 
-    /// Builds a function which is the actual starting execution point for a
-    /// rust task. This function is the glue necessary to execute the libstd
-    /// task and then clean up the green thread after it exits.
-    ///
-    /// The second argument to this function is actually a transmuted copy of
-    /// the `GreenTask` pointer. Context switches in the scheduler silently
-    /// transfer ownership of the `GreenTask` to the other end of the context
-    /// switch, so because this is the first code that is running in this task,
-    /// it must first re-acquire ownership of the green task.
-    pub fn build_start_wrapper(start: proc(), ops: uint) -> proc() {
-        proc() {
-            // First code after swap to this new context. Run our
-            // cleanup job after we have re-acquired ownership of the green
-            // task.
-            let mut task: ~GreenTask = unsafe { GreenTask::from_uint(ops) };
-            task.pool_id = {
-                let sched = task.sched.get_mut_ref();
-                sched.run_cleanup_job();
-                sched.task_state.increment();
-                sched.pool_id
-            };
-
-            // Convert our green task to a libstd task and then execute the code
-            // requested. This is the "try/catch" block for this green task and
-            // is the wrapper for *all* code run in the task.
-            let mut start = Some(start);
-            let task = task.swap().run(|| start.take_unwrap()());
-
-            // Once the function has exited, it's time to run the termination
-            // routine. This means we need to context switch one more time but
-            // clean ourselves up on the other end. Since we have no way of
-            // preserving a handle to the GreenTask down to this point, this
-            // unfortunately must call `GreenTask::convert`. In order to avoid
-            // this we could add a `terminate` function to the `Runtime` trait
-            // in libstd, but that seems less appropriate since the coversion
-            // method exists.
-            GreenTask::convert(task).terminate();
-        }
-    }
-
     pub fn give_home(&mut self, new_home: Home) {
         match self.task_type {
             TypeGreen(ref mut home) => { *home = Some(new_home); }
@@ -279,9 +296,9 @@ impl GreenTask {
         Local::put(self.swap());
     }
 
-    fn terminate(mut ~self) {
+    fn terminate(mut ~self) -> ! {
         let sched = self.sched.take_unwrap();
-        sched.terminate_current_task(self);
+        sched.terminate_current_task(self)
     }
 
     // This function is used to remotely wakeup this green task back on to its
diff --git a/src/libnative/task.rs b/src/libnative/task.rs
index d0ca8364aa7..a9c3afbbb16 100644
--- a/src/libnative/task.rs
+++ b/src/libnative/task.rs
@@ -18,7 +18,7 @@ use std::cast;
 use std::rt::env;
 use std::rt::local::Local;
 use std::rt::rtio;
-use std::rt::task::{Task, BlockedTask};
+use std::rt::task::{Task, BlockedTask, SendMessage};
 use std::rt::thread::Thread;
 use std::rt;
 use std::task::TaskOpts;
@@ -68,10 +68,7 @@ pub fn spawn_opts(opts: TaskOpts, f: proc()) {
     task.stderr = stderr;
     task.stdout = stdout;
     match notify_chan {
-        Some(chan) => {
-            let on_exit = proc(task_result) { chan.send(task_result) };
-            task.death.on_exit = Some(on_exit);
-        }
+        Some(chan) => { task.death.on_exit = Some(SendMessage(chan)); }
         None => {}
     }
 
diff --git a/src/libstd/rt/local_heap.rs b/src/libstd/rt/local_heap.rs
index 023f712d3a0..7acce2ecb5b 100644
--- a/src/libstd/rt/local_heap.rs
+++ b/src/libstd/rt/local_heap.rs
@@ -23,6 +23,7 @@ use rt::local::Local;
 use rt::task::Task;
 use unstable::raw;
 use vec::ImmutableVector;
+use vec_ng::Vec;
 
 // This has no meaning with out rtdebug also turned on.
 #[cfg(rtdebug)]
@@ -33,7 +34,7 @@ static MAGIC: u32 = 0xbadc0ffe;
 pub type Box = raw::Box<()>;
 
 pub struct MemoryRegion {
-    priv allocations: ~[*AllocHeader],
+    priv allocations: Vec<*AllocHeader>,
     priv live_allocations: uint,
 }
 
@@ -48,7 +49,7 @@ impl LocalHeap {
     #[inline]
     pub fn new() -> LocalHeap {
         let region = MemoryRegion {
-            allocations: ~[],
+            allocations: Vec::new(),
             live_allocations: 0,
         };
         LocalHeap {
@@ -248,8 +249,8 @@ impl MemoryRegion {
     fn release(&mut self, alloc: &AllocHeader) {
         alloc.assert_sane();
         if TRACK_ALLOCATIONS > 1 {
-            rtassert!(self.allocations[alloc.index] == alloc as *AllocHeader);
-            self.allocations[alloc.index] = ptr::null();
+            rtassert!(self.allocations.as_slice()[alloc.index] == alloc as *AllocHeader);
+            self.allocations.as_mut_slice()[alloc.index] = ptr::null();
         }
     }
     #[cfg(not(rtdebug))]
@@ -260,8 +261,8 @@ impl MemoryRegion {
     fn update(&mut self, alloc: &mut AllocHeader, orig: *AllocHeader) {
         alloc.assert_sane();
         if TRACK_ALLOCATIONS > 1 {
-            rtassert!(self.allocations[alloc.index] == orig);
-            self.allocations[alloc.index] = &*alloc as *AllocHeader;
+            rtassert!(self.allocations.as_slice()[alloc.index] == orig);
+            self.allocations.as_mut_slice()[alloc.index] = &*alloc as *AllocHeader;
         }
     }
     #[cfg(not(rtdebug))]
@@ -274,7 +275,7 @@ impl Drop for MemoryRegion {
         if self.live_allocations != 0 {
             rtabort!("leaked managed memory ({} objects)", self.live_allocations);
         }
-        rtassert!(self.allocations.iter().all(|s| s.is_null()));
+        rtassert!(self.allocations.as_slice().iter().all(|s| s.is_null()));
     }
 }
 
diff --git a/src/libstd/rt/task.rs b/src/libstd/rt/task.rs
index e2b94e655e8..0719523af77 100644
--- a/src/libstd/rt/task.rs
+++ b/src/libstd/rt/task.rs
@@ -17,6 +17,7 @@ use any::AnyOwnExt;
 use cast;
 use cleanup;
 use clone::Clone;
+use comm::Chan;
 use io::Writer;
 use iter::{Iterator, Take};
 use local_data;
@@ -67,11 +68,17 @@ pub enum BlockedTask {
     Shared(UnsafeArc<AtomicUint>),
 }
 
+pub enum DeathAction {
+    /// Action to be done with the exit code. If set, also makes the task wait
+    /// until all its watched children exit before collecting the status.
+    Execute(proc(TaskResult)),
+    /// A channel to send the result of the task on when the task exits
+    SendMessage(Chan<TaskResult>),
+}
+
 /// Per-task state related to task death, killing, failure, etc.
 pub struct Death {
-    // Action to be done with the exit code. If set, also makes the task wait
-    // until all its watched children exit before collecting the status.
-    on_exit: Option<proc(TaskResult)>,
+    on_exit: Option<DeathAction>,
 }
 
 pub struct BlockedTasks {
@@ -381,7 +388,8 @@ impl Death {
     /// Collect failure exit codes from children and propagate them to a parent.
     pub fn collect_failure(&mut self, result: TaskResult) {
         match self.on_exit.take() {
-            Some(f) => f(result),
+            Some(Execute(f)) => f(result),
+            Some(SendMessage(ch)) => { ch.try_send(result); }
             None => {}
         }
     }
diff --git a/src/libstd/unstable/raw.rs b/src/libstd/unstable/raw.rs
index 87547997798..c25422d24e9 100644
--- a/src/libstd/unstable/raw.rs
+++ b/src/libstd/unstable/raw.rs
@@ -41,6 +41,12 @@ pub struct Closure {
     env: *(),
 }
 
+/// The representation of a Rust procedure (`proc()`)
+pub struct Procedure {
+    code: *(),
+    env: *(),
+}
+
 /// This trait is meant to map equivalences between raw structs and their
 /// corresponding rust values.
 pub trait Repr<T> {
diff --git a/src/rt/arch/arm/_context.S b/src/rt/arch/arm/_context.S
index 4ab463d968e..fb6db57414a 100644
--- a/src/rt/arch/arm/_context.S
+++ b/src/rt/arch/arm/_context.S
@@ -51,3 +51,11 @@ rust_swap_registers:
 	msr cpsr_cxsf, r2
 
 	mov pc, lr
+
+// For reasons of this existence, see the comments in x86_64/_context.S
+.globl rust_bootstrap_green_task
+rust_bootstrap_green_task:
+        mov r0, r0
+        mov r1, r3
+        mov r2, r4
+        mov pc, r5
diff --git a/src/rt/arch/x86_64/_context.S b/src/rt/arch/x86_64/_context.S
index 74f20650f30..36caf7720c4 100644
--- a/src/rt/arch/x86_64/_context.S
+++ b/src/rt/arch/x86_64/_context.S
@@ -157,3 +157,36 @@ SWAP_REGISTERS:
         // Jump to the instruction pointer
         // found in regs:
         jmp *(RUSTRT_IP*8)(ARG1)
+
+// This function below, rust_bootstrap_green_task, is used to initialize a green
+// task. This code is the very first code that is run whenever a green task
+// starts. The only assumptions that this code makes is that it has a register
+// context previously set up by Context::new() and some values are in some
+// special registers.
+//
+// In theory the register context could be set up and then the context switching
+// would plop us directly into some 'extern "C" fn', but not all platforms have
+// the argument registers saved throughout a context switch (linux doesn't save
+// rdi/rsi, the first two argument registers). Instead of modifying all context
+// switches, instead the initial data for starting a green thread is shoved into
+// unrelated registers (r12/13, etc) which always need to be saved on context
+// switches anyway.
+//
+// With this strategy we get the benefit of being able to pass a fair bit of
+// contextual data from the start of a green task to its init function, as well
+// as not hindering any context switches.
+//
+// If you alter this code in any way, you likely need to update
+// src/libgreen/context.rs as well.
+
+#if defined(__APPLE__)
+#define BOOTSTRAP _rust_bootstrap_green_task
+#else
+#define BOOTSTRAP rust_bootstrap_green_task
+#endif
+.globl BOOTSTRAP
+BOOTSTRAP:
+    mov %r12, RUSTRT_ARG0_S
+    mov %r13, RUSTRT_ARG1_S
+    mov %r14, RUSTRT_ARG2_S
+    jmpq *%r15