1 files changed, 523 insertions, 0 deletions
diff --git a/src/libworkcache/lib.rs b/src/libworkcache/lib.rs
new file mode 100644
index 00000000000..eb63f2dacbb
--- /dev/null
+++ b/src/libworkcache/lib.rs
@@ -0,0 +1,523 @@
+// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#[crate_id = "workcache#0.10-pre"];
+#[crate_type = "rlib"];
+#[crate_type = "dylib"];
+#[license = "MIT/ASL2"];
+#[allow(deprecated_owned_vector, visible_private_types)];
+
+extern crate serialize;
+extern crate collections;
+extern crate sync;
+
+use serialize::json;
+use serialize::json::ToJson;
+use serialize::{Encoder, Encodable, Decoder, Decodable};
+use sync::{Arc,RWArc};
+use collections::TreeMap;
+use std::str;
+use std::io;
+use std::io::{File, MemWriter};
+
+/**
+*
+* This is a loose clone of the [fbuild build system](https://github.com/felix-lang/fbuild),
+* made a touch more generic (not wired to special cases on files) and much
+* less metaprogram-y due to rust's comparative weakness there, relative to
+* python.
+*
+* It's based around _imperative builds_ that happen to have some function
+* calls cached. That is, it's _just_ a mechanism for describing cached
+* functions. This makes it much simpler and smaller than a "build system"
+* that produces an IR and evaluates it. The evaluation order is normal
+* function calls. Some of them just return really quickly.
+*
+* A cached function consumes and produces a set of _works_. A work has a
+* name, a kind (that determines how the value is to be checked for
+* freshness) and a value. Works must also be (de)serializable. Some
+* examples of works:
+*
+*    kind   name    value
+*   ------------------------
+*    cfg    os      linux
+*    file   foo.c   <sha1>
+*    url    foo.com <etag>
+*
+* Works are conceptually single units, but we store them most of the time
+* in maps of the form (type,name) => value. These are WorkMaps.
+*
+* A cached function divides the works it's interested in into inputs and
+* outputs, and subdivides those into declared (input) works and
+* discovered (input and output) works.
+*
+* A _declared_ input or is one that is given to the workcache before
+* any work actually happens, in the "prep" phase. Even when a function's
+* work-doing part (the "exec" phase) never gets called, it has declared
+* inputs, which can be checked for freshness (and potentially
+* used to determine that the function can be skipped).
+*
+* The workcache checks _all_ works for freshness, but uses the set of
+* discovered outputs from the _previous_ exec (which it will re-discover
+* and re-record each time the exec phase runs).
+*
+* Therefore the discovered works cached in the db might be a
+* mis-approximation of the current discoverable works, but this is ok for
+* the following reason: we assume that if an artifact A changed from
+* depending on B,C,D to depending on B,C,D,E, then A itself changed (as
+* part of the change-in-dependencies), so we will be ok.
+*
+* Each function has a single discriminated output work called its _result_.
+* This is only different from other works in that it is returned, by value,
+* from a call to the cacheable function; the other output works are used in
+* passing to invalidate dependencies elsewhere in the cache, but do not
+* otherwise escape from a function invocation. Most functions only have one
+* output work anyways.
+*
+* A database (the central store of a workcache) stores a mappings:
+*
+* (fn_name,{declared_input}) => ({discovered_input},
+*                                {discovered_output},result)
+*
+* (Note: fbuild, which workcache is based on, has the concept of a declared
+* output as separate from a discovered output. This distinction exists only
+* as an artifact of how fbuild works: via annotations on function types
+* and metaprogramming, with explicit dependency declaration as a fallback.
+* Workcache is more explicit about dependencies, and as such treats all
+* outputs the same, as discovered-during-the-last-run.)
+*
+*/
+
+#[deriving(Clone, Eq, Encodable, Decodable, Ord, TotalOrd, TotalEq)]
+struct WorkKey {
+    kind: ~str,
+    name: ~str
+}
+
+impl WorkKey {
+    pub fn new(kind: &str, name: &str) -> WorkKey {
+        WorkKey {
+            kind: kind.to_owned(),
+            name: name.to_owned(),
+        }
+    }
+}
+
+// FIXME #8883: The key should be a WorkKey and not a ~str.
+// This is working around some JSON weirdness.
+#[deriving(Clone, Eq, Encodable, Decodable)]
+struct WorkMap(TreeMap<~str, KindMap>);
+
+#[deriving(Clone, Eq, Encodable, Decodable)]
+struct KindMap(TreeMap<~str, ~str>);
+
+impl WorkMap {
+    fn new() -> WorkMap { WorkMap(TreeMap::new()) }
+
+    fn insert_work_key(&mut self, k: WorkKey, val: ~str) {
+        let WorkKey { kind, name } = k;
+        let WorkMap(ref mut map) = *self;
+        match map.find_mut(&name) {
+            Some(&KindMap(ref mut m)) => { m.insert(kind, val); return; }
+            None => ()
+        }
+        let mut new_map = TreeMap::new();
+        new_map.insert(kind, val);
+        map.insert(name, KindMap(new_map));
+    }
+}
+
+pub struct Database {
+    priv db_filename: Path,
+    priv db_cache: TreeMap<~str, ~str>,
+    db_dirty: bool
+}
+
+impl Database {
+
+    pub fn new(p: Path) -> Database {
+        let mut rslt = Database {
+            db_filename: p,
+            db_cache: TreeMap::new(),
+            db_dirty: false
+        };
+        if rslt.db_filename.exists() {
+            rslt.load();
+        }
+        rslt
+    }
+
+    pub fn prepare(&self,
+                   fn_name: &str,
+                   declared_inputs: &WorkMap)
+                   -> Option<(WorkMap, WorkMap, ~str)> {
+        let k = json_encode(&(fn_name, declared_inputs));
+        match self.db_cache.find(&k) {
+            None => None,
+            Some(v) => Some(json_decode(*v))
+        }
+    }
+
+    pub fn cache(&mut self,
+                 fn_name: &str,
+                 declared_inputs: &WorkMap,
+                 discovered_inputs: &WorkMap,
+                 discovered_outputs: &WorkMap,
+                 result: &str) {
+        let k = json_encode(&(fn_name, declared_inputs));
+        let v = json_encode(&(discovered_inputs,
+                              discovered_outputs,
+                              result));
+        self.db_cache.insert(k,v);
+        self.db_dirty = true
+    }
+
+    // FIXME #4330: This should have &mut self and should set self.db_dirty to false.
+    fn save(&self) -> io::IoResult<()> {
+        let mut f = File::create(&self.db_filename);
+        self.db_cache.to_json().to_pretty_writer(&mut f)
+    }
+
+    fn load(&mut self) {
+        assert!(!self.db_dirty);
+        assert!(self.db_filename.exists());
+        match File::open(&self.db_filename) {
+            Err(e) => fail!("Couldn't load workcache database {}: {}",
+                            self.db_filename.display(),
+                            e),
+            Ok(mut stream) => {
+                match json::from_reader(&mut stream) {
+                    Err(e) => fail!("Couldn't parse workcache database (from file {}): {}",
+                                    self.db_filename.display(), e.to_str()),
+                    Ok(r) => {
+                        let mut decoder = json::Decoder::new(r);
+                        self.db_cache = Decodable::decode(&mut decoder);
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[unsafe_destructor]
+impl Drop for Database {
+    fn drop(&mut self) {
+        if self.db_dirty {
+            // FIXME: is failing the right thing to do here
+            self.save().unwrap();
+        }
+    }
+}
+
+pub type FreshnessMap = TreeMap<~str,extern fn(&str,&str)->bool>;
+
+#[deriving(Clone)]
+pub struct Context {
+    db: RWArc<Database>,
+    priv cfg: Arc<json::Object>,
+    /// Map from kinds (source, exe, url, etc.) to a freshness function.
+    /// The freshness function takes a name (e.g. file path) and value
+    /// (e.g. hash of file contents) and determines whether it's up-to-date.
+    /// For example, in the file case, this would read the file off disk,
+    /// hash it, and return the result of comparing the given hash and the
+    /// read hash for equality.
+    priv freshness: Arc<FreshnessMap>
+}
+
+pub struct Prep<'a> {
+    priv ctxt: &'a Context,
+    priv fn_name: &'a str,
+    priv declared_inputs: WorkMap,
+}
+
+pub struct Exec {
+    priv discovered_inputs: WorkMap,
+    priv discovered_outputs: WorkMap
+}
+
+enum Work<'a, T> {
+    WorkValue(T),
+    WorkFromTask(&'a Prep<'a>, Receiver<(Exec, T)>),
+}
+
+fn json_encode<'a, T:Encodable<json::Encoder<'a>>>(t: &T) -> ~str {
+    let mut writer = MemWriter::new();
+    let mut encoder = json::Encoder::new(&mut writer as &mut io::Writer);
+    t.encode(&mut encoder);
+    str::from_utf8_owned(writer.unwrap()).unwrap()
+}
+
+// FIXME(#5121)
+fn json_decode<T:Decodable<json::Decoder>>(s: &str) -> T {
+    debug!("json decoding: {}", s);
+    let j = json::from_str(s).unwrap();
+    let mut decoder = json::Decoder::new(j);
+    Decodable::decode(&mut decoder)
+}
+
+impl Context {
+
+    pub fn new(db: RWArc<Database>,
+               cfg: Arc<json::Object>) -> Context {
+        Context::new_with_freshness(db, cfg, Arc::new(TreeMap::new()))
+    }
+
+    pub fn new_with_freshness(db: RWArc<Database>,
+                              cfg: Arc<json::Object>,
+                              freshness: Arc<FreshnessMap>) -> Context {
+        Context {
+            db: db,
+            cfg: cfg,
+            freshness: freshness
+        }
+    }
+
+    pub fn prep<'a>(&'a self, fn_name: &'a str) -> Prep<'a> {
+        Prep::new(self, fn_name)
+    }
+
+    pub fn with_prep<'a,
+                     T>(
+                     &'a self,
+                     fn_name: &'a str,
+                     blk: |p: &mut Prep| -> T)
+                     -> T {
+        let mut p = self.prep(fn_name);
+        blk(&mut p)
+    }
+
+}
+
+impl Exec {
+    pub fn discover_input(&mut self,
+                          dependency_kind: &str,
+                          dependency_name: &str,
+                          dependency_val: &str) {
+        debug!("Discovering input {} {} {}", dependency_kind, dependency_name, dependency_val);
+        self.discovered_inputs.insert_work_key(WorkKey::new(dependency_kind, dependency_name),
+                                 dependency_val.to_owned());
+    }
+    pub fn discover_output(&mut self,
+                           dependency_kind: &str,
+                           dependency_name: &str,
+                           dependency_val: &str) {
+        debug!("Discovering output {} {} {}", dependency_kind, dependency_name, dependency_val);
+        self.discovered_outputs.insert_work_key(WorkKey::new(dependency_kind, dependency_name),
+                                 dependency_val.to_owned());
+    }
+
+    // returns pairs of (kind, name)
+    pub fn lookup_discovered_inputs(&self) -> ~[(~str, ~str)] {
+        let mut rs = ~[];
+        let WorkMap(ref discovered_inputs) = self.discovered_inputs;
+        for (k, v) in discovered_inputs.iter() {
+            let KindMap(ref vmap) = *v;
+            for (k1, _) in vmap.iter() {
+                rs.push((k1.clone(), k.clone()));
+            }
+        }
+        rs
+    }
+}
+
+impl<'a> Prep<'a> {
+    fn new(ctxt: &'a Context, fn_name: &'a str) -> Prep<'a> {
+        Prep {
+            ctxt: ctxt,
+            fn_name: fn_name,
+            declared_inputs: WorkMap::new()
+        }
+    }
+
+    pub fn lookup_declared_inputs(&self) -> ~[~str] {
+        let mut rs = ~[];
+        let WorkMap(ref declared_inputs) = self.declared_inputs;
+        for (_, v) in declared_inputs.iter() {
+            let KindMap(ref vmap) = *v;
+            for (inp, _) in vmap.iter() {
+                rs.push(inp.clone());
+            }
+        }
+        rs
+    }
+}
+
+impl<'a> Prep<'a> {
+    pub fn declare_input(&mut self, kind: &str, name: &str, val: &str) {
+        debug!("Declaring input {} {} {}", kind, name, val);
+        self.declared_inputs.insert_work_key(WorkKey::new(kind, name),
+                                 val.to_owned());
+    }
+
+    fn is_fresh(&self, cat: &str, kind: &str,
+                name: &str, val: &str) -> bool {
+        let k = kind.to_owned();
+        let f = self.ctxt.freshness.get().find(&k);
+        debug!("freshness for: {}/{}/{}/{}", cat, kind, name, val)
+        let fresh = match f {
+            None => fail!("missing freshness-function for '{}'", kind),
+            Some(f) => (*f)(name, val)
+        };
+        if fresh {
+            info!("{} {}:{} is fresh", cat, kind, name);
+        } else {
+            info!("{} {}:{} is not fresh", cat, kind, name);
+        }
+        fresh
+    }
+
+    fn all_fresh(&self, cat: &str, map: &WorkMap) -> bool {
+        let WorkMap(ref map) = *map;
+        for (k_name, kindmap) in map.iter() {
+            let KindMap(ref kindmap_) = *kindmap;
+            for (k_kind, v) in kindmap_.iter() {
+               if ! self.is_fresh(cat, *k_kind, *k_name, *v) {
+                  return false;
+            }
+          }
+        }
+        return true;
+    }
+
+    pub fn exec<'a, T:Send +
+        Encodable<json::Encoder<'a>> +
+        Decodable<json::Decoder>>(
+            &'a self, blk: proc(&mut Exec) -> T) -> T {
+        self.exec_work(blk).unwrap()
+    }
+
+    fn exec_work<'a, T:Send +
+        Encodable<json::Encoder<'a>> +
+        Decodable<json::Decoder>>( // FIXME(#5121)
+            &'a self, blk: proc(&mut Exec) -> T) -> Work<'a, T> {
+        let mut bo = Some(blk);
+
+        debug!("exec_work: looking up {} and {:?}", self.fn_name,
+               self.declared_inputs);
+        let cached = self.ctxt.db.read(|db| {
+            db.prepare(self.fn_name, &self.declared_inputs)
+        });
+
+        match cached {
+            Some((ref disc_in, ref disc_out, ref res))
+            if self.all_fresh("declared input",&self.declared_inputs) &&
+               self.all_fresh("discovered input", disc_in) &&
+               self.all_fresh("discovered output", disc_out) => {
+                debug!("Cache hit!");
+                debug!("Trying to decode: {:?} / {:?} / {}",
+                       disc_in, disc_out, *res);
+                Work::from_value(json_decode(*res))
+            }
+
+            _ => {
+                debug!("Cache miss!");
+                let (tx, rx) = channel();
+                let blk = bo.take_unwrap();
+
+                // FIXME: What happens if the task fails?
+                spawn(proc() {
+                    let mut exe = Exec {
+                        discovered_inputs: WorkMap::new(),
+                        discovered_outputs: WorkMap::new(),
+                    };
+                    let v = blk(&mut exe);
+                    tx.send((exe, v));
+                });
+                Work::from_task(self, rx)
+            }
+        }
+    }
+}
+
+impl<'a, T:Send +
+       Encodable<json::Encoder<'a>> +
+       Decodable<json::Decoder>>
+    Work<'a, T> { // FIXME(#5121)
+
+    pub fn from_value(elt: T) -> Work<'a, T> {
+        WorkValue(elt)
+    }
+    pub fn from_task(prep: &'a Prep<'a>, port: Receiver<(Exec, T)>)
+        -> Work<'a, T> {
+        WorkFromTask(prep, port)
+    }
+
+    pub fn unwrap(self) -> T {
+        match self {
+            WorkValue(v) => v,
+            WorkFromTask(prep, port) => {
+                let (exe, v) = port.recv();
+                let s = json_encode(&v);
+                prep.ctxt.db.write(|db| {
+                    db.cache(prep.fn_name,
+                             &prep.declared_inputs,
+                             &exe.discovered_inputs,
+                             &exe.discovered_outputs,
+                             s)
+                });
+                v
+            }
+        }
+    }
+}
+
+
+#[test]
+#[cfg(not(target_os="android"))] // FIXME(#10455)
+fn test() {
+    use std::os;
+    use std::io::{fs, Process};
+    use std::str::from_utf8_owned;
+
+    // Create a path to a new file 'filename' in the directory in which
+    // this test is running.
+    fn make_path(filename: ~str) -> Path {
+        let pth = os::self_exe_path().expect("workcache::test failed").with_filename(filename);
+        if pth.exists() {
+            fs::unlink(&pth).unwrap();
+        }
+        return pth;
+    }
+
+    let pth = make_path(~"foo.c");
+    File::create(&pth).write(bytes!("int main() { return 0; }")).unwrap();
+
+    let db_path = make_path(~"db.json");
+
+    let cx = Context::new(RWArc::new(Database::new(db_path)),
+                          Arc::new(TreeMap::new()));
+
+    let s = cx.with_prep("test1", |prep| {
+
+        let subcx = cx.clone();
+        let pth = pth.clone();
+
+        let contents = File::open(&pth).read_to_end().unwrap();
+        let file_content = from_utf8_owned(contents).unwrap();
+
+        // FIXME (#9639): This needs to handle non-utf8 paths
+        prep.declare_input("file", pth.as_str().unwrap(), file_content);
+        prep.exec(proc(_exe) {
+            let out = make_path(~"foo.o");
+            // FIXME (#9639): This needs to handle non-utf8 paths
+            Process::status("gcc", [pth.as_str().unwrap().to_owned(),
+                                    ~"-o",
+                                    out.as_str().unwrap().to_owned()]).unwrap();
+
+            let _proof_of_concept = subcx.prep("subfn");
+            // Could run sub-rules inside here.
+
+            // FIXME (#9639): This needs to handle non-utf8 paths
+            out.as_str().unwrap().to_owned()
+        })
+    });
+
+    println!("{}", s);
+}