about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBen Blum <bblum@andrew.cmu.edu>2012-07-20 18:06:17 -0400
committerBen Blum <bblum@andrew.cmu.edu>2012-07-20 19:23:19 -0400
commit5bb4a12900969b3250c490a5e0682c658fe65ba8 (patch)
tree9fee7318849691fd35658af7363dfdd8d433b0c9
parentf55999fd7a03b4f876e69e94e491d3c30bf0c076 (diff)
downloadrust-5bb4a12900969b3250c490a5e0682c658fe65ba8.tar.gz
rust-5bb4a12900969b3250c490a5e0682c658fe65ba8.zip
[1/4 for #2365, #2671] Fix create/kill race with schedulers and tasks during rust_kernel::fail
-rw-r--r--src/rt/rust_kernel.cpp12
-rw-r--r--src/rt/rust_kernel.h6
-rw-r--r--src/rt/rust_sched_launcher.cpp23
-rw-r--r--src/rt/rust_sched_launcher.h14
-rw-r--r--src/rt/rust_sched_loop.cpp10
-rw-r--r--src/rt/rust_sched_loop.h4
-rw-r--r--src/rt/rust_scheduler.cpp12
-rw-r--r--src/rt/rust_scheduler.h8
-rw-r--r--src/rt/rust_task.cpp11
-rw-r--r--src/rt/rust_task.h1
10 files changed, 69 insertions, 32 deletions
diff --git a/src/rt/rust_kernel.cpp b/src/rt/rust_kernel.cpp
index cbe0bb21199..7b1adf3ef4c 100644
--- a/src/rt/rust_kernel.cpp
+++ b/src/rt/rust_kernel.cpp
@@ -19,6 +19,7 @@ rust_kernel::rust_kernel(rust_env *env) :
     max_port_id(1),
     rval(0),
     max_sched_id(1),
+    killed(false),
     sched_reaper(this),
     osmain_driver(NULL),
     non_weak_tasks(0),
@@ -103,7 +104,8 @@ rust_kernel::create_scheduler(rust_sched_launcher_factory *launchfac,
         id = max_sched_id++;
         assert(id != INTPTR_MAX && "Hit the maximum scheduler id");
         sched = new (this, "rust_scheduler")
-            rust_scheduler(this, num_threads, id, allow_exit, launchfac);
+            rust_scheduler(this, num_threads, id, allow_exit, killed,
+                           launchfac);
         bool is_new = sched_table
             .insert(std::pair<rust_sched_id,
                               rust_scheduler*>(id, sched)).second;
@@ -197,6 +199,10 @@ rust_kernel::fail() {
 #endif
     // Copy the list of schedulers so that we don't hold the lock while
     // running kill_all_tasks.
+    // I think this only needs to be done by one task ever; as it is,
+    // multiple tasks invoking kill_all might get here. Currently libcore
+    // ensures only one task will ever invoke it, but this would really be
+    // fine either way, so I'm leaving it as it is. -- bblum
     // FIXME (#2671): There's a lot that happens under kill_all_tasks,
     // and I don't know that holding sched_lock here is ok, but we need
     // to hold the sched lock to prevent the scheduler from being
@@ -205,15 +211,13 @@ rust_kernel::fail() {
     std::vector<rust_scheduler*> scheds;
     {
         scoped_lock with(sched_lock);
+        killed = true;
         for (sched_map::iterator iter = sched_table.begin();
              iter != sched_table.end(); iter++) {
             scheds.push_back(iter->second);
         }
     }
 
-    // FIXME (#2671): This is not a foolproof way to kill all tasks
-    // while ensuring that no new tasks or schedulers are created in the
-    // meantime that keep the scheduler alive.
     for (std::vector<rust_scheduler*>::iterator iter = scheds.begin();
          iter != scheds.end(); iter++) {
         (*iter)->kill_all_tasks();
diff --git a/src/rt/rust_kernel.h b/src/rt/rust_kernel.h
index ba2c6a7bff0..00c78dea2f9 100644
--- a/src/rt/rust_kernel.h
+++ b/src/rt/rust_kernel.h
@@ -72,7 +72,7 @@ class rust_kernel {
     lock_and_signal rval_lock;
     int rval;
 
-    // Protects max_sched_id and sched_table, join_list
+    // Protects max_sched_id and sched_table, join_list, killed
     lock_and_signal sched_lock;
     // The next scheduler id
     rust_sched_id max_sched_id;
@@ -81,6 +81,10 @@ class rust_kernel {
     sched_map sched_table;
     // A list of scheduler ids that are ready to exit
     std::vector<rust_sched_id> join_list;
+    // Whether or not the runtime has to die (triggered when the root/main
+    // task group fails). This propagates to all new schedulers and tasks
+    // created after it is set.
+    bool killed;
 
     rust_sched_reaper sched_reaper;
     // The single-threaded scheduler that uses the main thread
diff --git a/src/rt/rust_sched_launcher.cpp b/src/rt/rust_sched_launcher.cpp
index bf0481b02f2..9dfa06800b6 100644
--- a/src/rt/rust_sched_launcher.cpp
+++ b/src/rt/rust_sched_launcher.cpp
@@ -4,33 +4,36 @@
 
 const size_t SCHED_STACK_SIZE = 1024*100;
 
-rust_sched_launcher::rust_sched_launcher(rust_scheduler *sched, int id)
+rust_sched_launcher::rust_sched_launcher(rust_scheduler *sched, int id,
+                                         bool killed)
     : kernel(sched->kernel),
-      sched_loop(sched, id),
+      sched_loop(sched, id, killed),
       driver(&sched_loop) {
 }
 
 rust_thread_sched_launcher::rust_thread_sched_launcher(rust_scheduler *sched,
-                                                       int id)
-    : rust_sched_launcher(sched, id),
+                                                       int id, bool killed)
+    : rust_sched_launcher(sched, id, killed),
       rust_thread(SCHED_STACK_SIZE) {
 }
 
 rust_manual_sched_launcher::rust_manual_sched_launcher(rust_scheduler *sched,
-                                                       int id)
-    : rust_sched_launcher(sched, id) {
+                                                       int id, bool killed)
+    : rust_sched_launcher(sched, id, killed) {
 }
 
 rust_sched_launcher *
-rust_thread_sched_launcher_factory::create(rust_scheduler *sched, int id) {
+rust_thread_sched_launcher_factory::create(rust_scheduler *sched, int id,
+                                           bool killed) {
     return new(sched->kernel, "rust_thread_sched_launcher")
-        rust_thread_sched_launcher(sched, id);
+        rust_thread_sched_launcher(sched, id, killed);
 }
 
 rust_sched_launcher *
-rust_manual_sched_launcher_factory::create(rust_scheduler *sched, int id) {
+rust_manual_sched_launcher_factory::create(rust_scheduler *sched, int id,
+                                           bool killed) {
     assert(launcher == NULL && "I can only track one sched_launcher");
     launcher = new(sched->kernel, "rust_manual_sched_launcher")
-        rust_manual_sched_launcher(sched, id);
+        rust_manual_sched_launcher(sched, id, killed);
     return launcher;
 }
diff --git a/src/rt/rust_sched_launcher.h b/src/rt/rust_sched_launcher.h
index ace2cba2530..72c9fd2dd36 100644
--- a/src/rt/rust_sched_launcher.h
+++ b/src/rt/rust_sched_launcher.h
@@ -17,7 +17,7 @@ protected:
     rust_sched_driver driver;
 
 public:
-    rust_sched_launcher(rust_scheduler *sched, int id);
+    rust_sched_launcher(rust_scheduler *sched, int id, bool killed);
     virtual ~rust_sched_launcher() { }
 
     virtual void start() = 0;
@@ -29,7 +29,7 @@ class rust_thread_sched_launcher
   :public rust_sched_launcher,
    private rust_thread {
 public:
-    rust_thread_sched_launcher(rust_scheduler *sched, int id);
+    rust_thread_sched_launcher(rust_scheduler *sched, int id, bool killed);
     virtual void start() { rust_thread::start(); }
     virtual void join() { rust_thread::join(); }
     virtual void run() { driver.start_main_loop(); }
@@ -37,7 +37,7 @@ public:
 
 class rust_manual_sched_launcher : public rust_sched_launcher {
 public:
-    rust_manual_sched_launcher(rust_scheduler *sched, int id);
+    rust_manual_sched_launcher(rust_scheduler *sched, int id, bool killed);
     virtual void start() { }
     virtual void join() { }
     rust_sched_driver *get_driver() { return &driver; };
@@ -47,13 +47,14 @@ class rust_sched_launcher_factory {
 public:
     virtual ~rust_sched_launcher_factory() { }
     virtual rust_sched_launcher *
-    create(rust_scheduler *sched, int id) = 0;
+    create(rust_scheduler *sched, int id, bool killed) = 0;
 };
 
 class rust_thread_sched_launcher_factory
     : public rust_sched_launcher_factory {
 public:
-    virtual rust_sched_launcher *create(rust_scheduler *sched, int id);
+    virtual rust_sched_launcher *create(rust_scheduler *sched, int id,
+                                        bool killed);
 };
 
 class rust_manual_sched_launcher_factory
@@ -62,7 +63,8 @@ private:
     rust_manual_sched_launcher *launcher;
 public:
     rust_manual_sched_launcher_factory() : launcher(NULL) { }
-    virtual rust_sched_launcher *create(rust_scheduler *sched, int id);
+    virtual rust_sched_launcher *create(rust_scheduler *sched, int id,
+                                        bool killed);
     rust_sched_driver *get_driver() {
         assert(launcher != NULL);
         return launcher->get_driver();
diff --git a/src/rt/rust_sched_loop.cpp b/src/rt/rust_sched_loop.cpp
index 1fbc0c1e07c..aa2c62c630d 100644
--- a/src/rt/rust_sched_loop.cpp
+++ b/src/rt/rust_sched_loop.cpp
@@ -13,12 +13,13 @@ const size_t C_STACK_SIZE = 1024*1024;
 
 bool rust_sched_loop::tls_initialized = false;
 
-rust_sched_loop::rust_sched_loop(rust_scheduler *sched,int id) :
+rust_sched_loop::rust_sched_loop(rust_scheduler *sched, int id, bool killed) :
     _log(this),
     id(id),
     should_exit(false),
     cached_c_stack(NULL),
     dead_task(NULL),
+    killed(killed),
     pump_signal(NULL),
     kernel(sched->kernel),
     sched(sched),
@@ -63,6 +64,8 @@ rust_sched_loop::kill_all_tasks() {
 
     {
         scoped_lock with(lock);
+        // Any task created after this will be killed. See transition, below.
+        killed = true;
 
         for (size_t i = 0; i < running_tasks.length(); i++) {
             all_tasks.push_back(running_tasks[i]);
@@ -319,6 +322,11 @@ rust_sched_loop::transition(rust_task *task,
     }
     task->set_state(dst, cond, cond_name);
 
+    // If the entire runtime is failing, newborn tasks must be doomed.
+    if (src == task_state_newborn && killed) {
+        task->kill_inner();
+    }
+
     pump_loop();
 }
 
diff --git a/src/rt/rust_sched_loop.h b/src/rt/rust_sched_loop.h
index 61fd8ac05fd..eb70fe07b7e 100644
--- a/src/rt/rust_sched_loop.h
+++ b/src/rt/rust_sched_loop.h
@@ -60,6 +60,7 @@ private:
     rust_task_list running_tasks;
     rust_task_list blocked_tasks;
     rust_task *dead_task;
+    bool killed;
 
     rust_signal *pump_signal;
 
@@ -91,7 +92,7 @@ public:
 
     // Only a pointer to 'name' is kept, so it must live as long as this
     // domain.
-    rust_sched_loop(rust_scheduler *sched, int id);
+    rust_sched_loop(rust_scheduler *sched, int id, bool killed);
     void activate(rust_task *task);
     rust_log & get_log();
     void fail();
@@ -107,6 +108,7 @@ public:
     void log_state();
 
     void kill_all_tasks();
+    bool doomed();
 
     rust_task *create_task(rust_task *spawner, const char *name);
 
diff --git a/src/rt/rust_scheduler.cpp b/src/rt/rust_scheduler.cpp
index 1f0b16a1d58..dc662b009ac 100644
--- a/src/rt/rust_scheduler.cpp
+++ b/src/rt/rust_scheduler.cpp
@@ -9,6 +9,7 @@ rust_scheduler::rust_scheduler(rust_kernel *kernel,
                                size_t num_threads,
                                rust_sched_id id,
                                bool allow_exit,
+                               bool killed,
                                rust_sched_launcher_factory *launchfac) :
     kernel(kernel),
     live_threads(num_threads),
@@ -18,7 +19,7 @@ rust_scheduler::rust_scheduler(rust_kernel *kernel,
     num_threads(num_threads),
     id(id)
 {
-    create_task_threads(launchfac);
+    create_task_threads(launchfac, killed);
 }
 
 rust_scheduler::~rust_scheduler() {
@@ -27,8 +28,8 @@ rust_scheduler::~rust_scheduler() {
 
 rust_sched_launcher *
 rust_scheduler::create_task_thread(rust_sched_launcher_factory *launchfac,
-                                   int id) {
-    rust_sched_launcher *thread = launchfac->create(this, id);
+                                   int id, bool killed) {
+    rust_sched_launcher *thread = launchfac->create(this, id, killed);
     KLOG(kernel, kern, "created task thread: " PTR ", id: %d",
           thread, id);
     return thread;
@@ -41,11 +42,12 @@ rust_scheduler::destroy_task_thread(rust_sched_launcher *thread) {
 }
 
 void
-rust_scheduler::create_task_threads(rust_sched_launcher_factory *launchfac) {
+rust_scheduler::create_task_threads(rust_sched_launcher_factory *launchfac,
+                                    bool killed) {
     KLOG(kernel, kern, "Using %d scheduler threads.", num_threads);
 
     for(size_t i = 0; i < num_threads; ++i) {
-        threads.push(create_task_thread(launchfac, i));
+        threads.push(create_task_thread(launchfac, i, killed));
     }
 }
 
diff --git a/src/rt/rust_scheduler.h b/src/rt/rust_scheduler.h
index 74e6d6bf2bd..699354f6d78 100644
--- a/src/rt/rust_scheduler.h
+++ b/src/rt/rust_scheduler.h
@@ -34,18 +34,20 @@ private:
 
     rust_sched_id id;
 
-    void create_task_threads(rust_sched_launcher_factory *launchfac);
+    void create_task_threads(rust_sched_launcher_factory *launchfac,
+                             bool killed);
     void destroy_task_threads();
 
     rust_sched_launcher *
-    create_task_thread(rust_sched_launcher_factory *launchfac, int id);
+    create_task_thread(rust_sched_launcher_factory *launchfac, int id,
+                       bool killed);
     void destroy_task_thread(rust_sched_launcher *thread);
 
     void exit();
 
 public:
     rust_scheduler(rust_kernel *kernel, size_t num_threads,
-                   rust_sched_id id, bool allow_exit,
+                   rust_sched_id id, bool allow_exit, bool killed,
                    rust_sched_launcher_factory *launchfac);
     ~rust_scheduler();
 
diff --git a/src/rt/rust_task.cpp b/src/rt/rust_task.cpp
index 84984d499e9..c28e3350bf2 100644
--- a/src/rt/rust_task.cpp
+++ b/src/rt/rust_task.cpp
@@ -257,8 +257,17 @@ rust_task::yield(bool *killed) {
 void
 rust_task::kill() {
     scoped_lock with(lifecycle_lock);
+    kill_inner();
+}
+
+void rust_task::kill_inner() {
+    lifecycle_lock.must_have_lock();
 
-    // XXX: bblum: kill/kill race
+    // Multiple kills should be able to safely race, but check anyway.
+    if (killed) {
+        LOG(this, task, "task %s @0x%" PRIxPTR " already killed", name, this);
+        return;
+    }
 
     // Note the distinction here: kill() is when you're in an upcall
     // from task A and want to force-fail task B, you do B->kill().
diff --git a/src/rt/rust_task.h b/src/rt/rust_task.h
index cae058d7227..e9538a5abd5 100644
--- a/src/rt/rust_task.h
+++ b/src/rt/rust_task.h
@@ -264,6 +264,7 @@ public:
 
     // Fail this task (assuming caller-on-stack is different task).
     void kill();
+    void kill_inner();
 
     // Indicates that we've been killed and now is an apropriate
     // time to fail as a result