diff options
| author | Niko Matsakis <niko@alum.mit.edu> | 2016-05-06 15:09:31 -0400 |
|---|---|---|
| committer | Niko Matsakis <niko@alum.mit.edu> | 2016-05-18 10:11:36 -0400 |
| commit | 3a2edd7e613638a61bf70499e4c225c2fec36a5d (patch) | |
| tree | c90f8d2a327fe22588152103f18ddcb28dd588d7 | |
| parent | b01919a1443615f2ee78f91515e8f01dc2591177 (diff) | |
| download | rust-3a2edd7e613638a61bf70499e4c225c2fec36a5d.tar.gz rust-3a2edd7e613638a61bf70499e4c225c2fec36a5d.zip | |
load/save hashes of metadata
This commit reorganizes how the persist code treats hashing. The idea is that each crate saves a file containing hashes representing the metadata for each item X. When we see a read from `MetaData(X)`, we can load this hash up (if we don't find a file for that crate, we just use the SVH for the entire crate). To compute the hash for `MetaData(Y)`, where Y is some local item, we examine all the predecessors of the `MetaData(Y)` node and hash their hashes together.
| -rw-r--r-- | src/librustc_incremental/persist/data.rs | 43 | ||||
| -rw-r--r-- | src/librustc_incremental/persist/directory.rs | 19 | ||||
| -rw-r--r-- | src/librustc_incremental/persist/hash.rs | 158 | ||||
| -rw-r--r-- | src/librustc_incremental/persist/load.rs | 5 | ||||
| -rw-r--r-- | src/librustc_incremental/persist/mod.rs | 1 | ||||
| -rw-r--r-- | src/librustc_incremental/persist/save.rs | 94 | ||||
| -rw-r--r-- | src/librustc_incremental/persist/util.rs | 21 |
7 files changed, 249 insertions, 92 deletions
diff --git a/src/librustc_incremental/persist/data.rs b/src/librustc_incremental/persist/data.rs index 5c68552b718..f57ab19a525 100644 --- a/src/librustc_incremental/persist/data.rs +++ b/src/librustc_incremental/persist/data.rs @@ -11,6 +11,7 @@ //! The data that we will serialize and deserialize. use rustc::dep_graph::DepNode; +use rustc::hir::def_id::DefIndex; use super::directory::DefPathIndex; @@ -34,30 +35,56 @@ pub struct SerializedDepGraph { /// compare them against the hashes we see at that time, which /// will tell us what has changed, either in this crate or in some /// crate that we depend on. + /// + /// Because they will be reloaded, we don't store the DefId (which + /// will be different when we next compile) related to each node, + /// but rather the `DefPathIndex`. This can then be retraced + /// to find the current def-id. pub hashes: Vec<SerializedHash>, } +pub type SerializedEdge = (DepNode<DefPathIndex>, DepNode<DefPathIndex>); + +#[derive(Debug, RustcEncodable, RustcDecodable)] +pub struct SerializedHash { + /// node being hashed; either a Hir or MetaData variant, in + /// practice + pub node: DepNode<DefPathIndex>, + + /// the hash itself, computed by `calculate_item_hash` + pub hash: u64, +} + /// Data for use when downstream crates get recompiled. #[derive(Debug, RustcEncodable, RustcDecodable)] pub struct SerializedMetadataHashes { /// For each def-id defined in this crate that appears in the /// metadata, we hash all the inputs that were used when producing - /// the metadata. We save this after compilation is done. Then, + /// the metadata. We save this after compilation is done. Then, /// when some downstream crate is being recompiled, it can compare /// the hashes we saved against the hashes that it saw from /// before; this will tell it which of the items in this crate /// changed, which in turn implies what items in the downstream /// crate need to be recompiled. - pub hashes: Vec<SerializedHash>, + /// + /// Note that we store the def-ids here. This is because we don't + /// reload this file when we recompile this crate, we will just + /// regenerate it completely with the current hashes and new def-ids. + /// + /// Then downstream creates will load up their + /// `SerializedDepGraph`, which may contain `MetaData(X)` nodes + /// where `X` refers to some item in this crate. That `X` will be + /// a `DefPathIndex` that gets retracted to the current `DefId` + /// (matching the one found in this structure). + pub hashes: Vec<SerializedMetadataHash>, } -pub type SerializedEdge = (DepNode<DefPathIndex>, DepNode<DefPathIndex>); - +/// The hash for some metadata that (when saving) will be exported +/// from this crate, or which (when importing) was exported by an +/// upstream crate. #[derive(Debug, RustcEncodable, RustcDecodable)] -pub struct SerializedHash { - /// node being hashed; either a Hir or MetaData variant, in - /// practice - pub node: DepNode<DefPathIndex>, +pub struct SerializedMetadataHash { + pub def_index: DefIndex, /// the hash itself, computed by `calculate_item_hash` pub hash: u64, diff --git a/src/librustc_incremental/persist/directory.rs b/src/librustc_incremental/persist/directory.rs index 07753158753..f9e90f39321 100644 --- a/src/librustc_incremental/persist/directory.rs +++ b/src/librustc_incremental/persist/directory.rs @@ -64,7 +64,7 @@ impl RetracedDefIdDirectory { pub struct DefIdDirectoryBuilder<'a,'tcx:'a> { tcx: TyCtxt<'a, 'tcx, 'tcx>, - hash: DefIdMap<Option<DefPathIndex>>, + hash: DefIdMap<DefPathIndex>, directory: DefIdDirectory, } @@ -77,29 +77,22 @@ impl<'a,'tcx> DefIdDirectoryBuilder<'a,'tcx> { } } - pub fn add(&mut self, def_id: DefId) -> Option<DefPathIndex> { - if !def_id.is_local() { - // FIXME(#32015) clarify story about cross-crate dep tracking - return None; - } - + pub fn add(&mut self, def_id: DefId) -> DefPathIndex { + debug!("DefIdDirectoryBuilder: def_id={:?}", def_id); let tcx = self.tcx; let paths = &mut self.directory.paths; self.hash.entry(def_id) .or_insert_with(|| { let def_path = tcx.def_path(def_id); - if !def_path.is_local() { - return None; - } let index = paths.len() as u32; paths.push(def_path); - Some(DefPathIndex { index: index }) + DefPathIndex { index: index } }) .clone() } - pub fn map(&mut self, node: DepNode<DefId>) -> Option<DepNode<DefPathIndex>> { - node.map_def(|&def_id| self.add(def_id)) + pub fn map(&mut self, node: DepNode<DefId>) -> DepNode<DefPathIndex> { + node.map_def(|&def_id| Some(self.add(def_id))).unwrap() } pub fn into_directory(self) -> DefIdDirectory { diff --git a/src/librustc_incremental/persist/hash.rs b/src/librustc_incremental/persist/hash.rs new file mode 100644 index 00000000000..c36b9ed0d26 --- /dev/null +++ b/src/librustc_incremental/persist/hash.rs @@ -0,0 +1,158 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use calculate_svh::SvhCalculate; +use rbml::Error; +use rbml::opaque::Decoder; +use rustc::dep_graph::DepNode; +use rustc::hir::def_id::DefId; +use rustc::hir::svh::Svh; +use rustc::ty::TyCtxt; +use rustc_data_structures::fnv::FnvHashMap; +use rustc_serialize::Decodable; +use std::io::{ErrorKind, Read}; +use std::fs::File; +use syntax::ast; + +use super::data::*; +use super::util::*; + +pub struct HashContext<'a, 'tcx: 'a> { + pub tcx: TyCtxt<'a, 'tcx, 'tcx>, + item_metadata_hashes: FnvHashMap<DefId, u64>, + crate_hashes: FnvHashMap<ast::CrateNum, Svh>, +} + +impl<'a, 'tcx> HashContext<'a, 'tcx> { + pub fn new(tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self { + HashContext { + tcx: tcx, + item_metadata_hashes: FnvHashMap(), + crate_hashes: FnvHashMap(), + } + } + + pub fn hash(&mut self, dep_node: DepNode<DefId>) -> Option<u64> { + match dep_node { + // HIR nodes (which always come from our crate) are an input: + DepNode::Hir(def_id) => { + assert!(def_id.is_local()); + Some(self.hir_hash(def_id)) + } + + // MetaData from other crates is an *input* to us. + // MetaData nodes from *our* crates are an *output*; we + // don't hash them, but we do compute a hash for them and + // save it for others to use. + DepNode::MetaData(def_id) if !def_id.is_local() => { + Some(self.metadata_hash(def_id)) + } + + _ => { + // Other kinds of nodes represent computed by-products + // that we don't hash directly; instead, they should + // have some transitive dependency on a Hir or + // MetaData node, so we'll just hash that + None + } + } + } + + fn hir_hash(&mut self, def_id: DefId) -> u64 { + assert!(def_id.is_local()); + // FIXME(#32753) -- should we use a distinct hash here + self.tcx.calculate_item_hash(def_id) + } + + fn metadata_hash(&mut self, def_id: DefId) -> u64 { + debug!("metadata_hash(def_id={:?})", def_id); + + assert!(!def_id.is_local()); + loop { + // check whether we have a result cached for this def-id + if let Some(&hash) = self.item_metadata_hashes.get(&def_id) { + debug!("metadata_hash: def_id={:?} hash={:?}", def_id, hash); + return hash; + } + + // check whether we did not find detailed metadata for this + // krate; in that case, we just use the krate's overall hash + if let Some(&hash) = self.crate_hashes.get(&def_id.krate) { + debug!("metadata_hash: def_id={:?} crate_hash={:?}", def_id, hash); + return hash.as_u64(); + } + + // otherwise, load the data and repeat. + self.load_data(def_id.krate); + assert!(self.crate_hashes.contains_key(&def_id.krate)); + } + } + + fn load_data(&mut self, cnum: ast::CrateNum) { + debug!("load_data(cnum={})", cnum); + + let svh = self.tcx.sess.cstore.crate_hash(cnum); + let old = self.crate_hashes.insert(cnum, svh); + debug!("load_data: svh={}", svh); + assert!(old.is_none(), "loaded data for crate {:?} twice", cnum); + + if let Some(path) = metadata_hash_path(self.tcx, cnum) { + debug!("load_data: path={:?}", path); + let mut data = vec![]; + match + File::open(&path) + .and_then(|mut file| file.read_to_end(&mut data)) + { + Ok(_) => { + match self.load_from_data(cnum, &data) { + Ok(()) => { } + Err(err) => { + bug!("decoding error in dep-graph from `{}`: {}", + path.display(), err); + } + } + } + Err(err) => { + match err.kind() { + ErrorKind::NotFound => { + // If the file is not found, that's ok. + } + _ => { + self.tcx.sess.err( + &format!("could not load dep information from `{}`: {}", + path.display(), err)); + return; + } + } + } + } + } + } + + fn load_from_data(&mut self, cnum: ast::CrateNum, data: &[u8]) -> Result<(), Error> { + debug!("load_from_data(cnum={})", cnum); + + // Load up the hashes for the def-ids from this crate. + let mut decoder = Decoder::new(data, 0); + let serialized_hashes = try!(SerializedMetadataHashes::decode(&mut decoder)); + for serialized_hash in serialized_hashes.hashes { + // the hashes are stored with just a def-index, which is + // always relative to the old crate; convert that to use + // our internal crate number + let def_id = DefId { krate: cnum, index: serialized_hash.def_index }; + + // record the hash for this dep-node + let old = self.item_metadata_hashes.insert(def_id, serialized_hash.hash); + debug!("load_from_data: def_id={:?} hash={}", def_id, serialized_hash.hash); + assert!(old.is_none(), "already have hash for {:?}", def_id); + } + Ok(()) + } +} diff --git a/src/librustc_incremental/persist/load.rs b/src/librustc_incremental/persist/load.rs index 35ef0917517..e3fd290443c 100644 --- a/src/librustc_incremental/persist/load.rs +++ b/src/librustc_incremental/persist/load.rs @@ -24,6 +24,7 @@ use std::path::Path; use super::data::*; use super::directory::*; use super::dirty_clean; +use super::hash::*; use super::util::*; type DirtyNodes = FnvHashSet<DepNode<DefId>>; @@ -133,13 +134,13 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, hashes: &[SerializedHash], retraced: &RetracedDefIdDirectory) -> DirtyNodes { + let mut hcx = HashContext::new(tcx); let mut items_removed = false; let mut dirty_nodes = FnvHashSet(); for hash in hashes { match hash.node.map_def(|&i| retraced.def_id(i)) { Some(dep_node) => { - // FIXME(#32753) -- should we use a distinct hash here - let current_hash = dep_node.hash(tcx).unwrap(); + let current_hash = hcx.hash(dep_node).unwrap(); debug!("initial_dirty_nodes: hash of {:?} is {:?}, was {:?}", dep_node, current_hash, hash.hash); if current_hash != hash.hash { diff --git a/src/librustc_incremental/persist/mod.rs b/src/librustc_incremental/persist/mod.rs index 8d04fd30a19..72ccc29c97b 100644 --- a/src/librustc_incremental/persist/mod.rs +++ b/src/librustc_incremental/persist/mod.rs @@ -15,6 +15,7 @@ mod data; mod directory; mod dirty_clean; +mod hash; mod load; mod save; mod util; diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs index 868f2ee4244..7deb1ca36db 100644 --- a/src/librustc_incremental/persist/save.rs +++ b/src/librustc_incremental/persist/save.rs @@ -20,18 +20,23 @@ use std::path::PathBuf; use super::data::*; use super::directory::*; +use super::hash::*; use super::util::*; pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) { let _ignore = tcx.dep_graph.in_ignore(); - - save_in(tcx, dep_graph_path(tcx), encode_dep_graph); - save_in(tcx, metadata_hash_path(tcx, LOCAL_CRATE), encode_metadata_hashes); + let mut hcx = HashContext::new(tcx); + save_in(&mut hcx, dep_graph_path(tcx), encode_dep_graph); + save_in(&mut hcx, metadata_hash_path(tcx, LOCAL_CRATE), encode_metadata_hashes); } -fn save_in<'a,'tcx,F>(tcx: TyCtxt<'a, 'tcx, 'tcx>, opt_path_buf: Option<PathBuf>, encode: F) - where F: FnOnce(TyCtxt<'a, 'tcx, 'tcx>, &mut Encoder) -> io::Result<()> +fn save_in<'a, 'tcx, F>(hcx: &mut HashContext<'a, 'tcx>, + opt_path_buf: Option<PathBuf>, + encode: F) + where F: FnOnce(&mut HashContext<'a, 'tcx>, &mut Encoder) -> io::Result<()> { + let tcx = hcx.tcx; + let path_buf = match opt_path_buf { Some(p) => p, None => return @@ -54,7 +59,7 @@ fn save_in<'a,'tcx,F>(tcx: TyCtxt<'a, 'tcx, 'tcx>, opt_path_buf: Option<PathBuf> // generate the data in a memory buffer let mut wr = Cursor::new(Vec::new()); - match encode(tcx, &mut Encoder::new(&mut wr)) { + match encode(hcx, &mut Encoder::new(&mut wr)) { Ok(()) => { } Err(err) => { tcx.sess.err( @@ -80,9 +85,11 @@ fn save_in<'a,'tcx,F>(tcx: TyCtxt<'a, 'tcx, 'tcx>, opt_path_buf: Option<PathBuf> } } -pub fn encode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, +pub fn encode_dep_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>, encoder: &mut Encoder) - -> io::Result<()> { + -> io::Result<()> +{ + let tcx = hcx.tcx; let query = tcx.dep_graph.query(); let mut builder = DefIdDirectoryBuilder::new(tcx); @@ -92,29 +99,24 @@ pub fn encode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, query.nodes() .into_iter() .filter_map(|dep_node| { - dep_node.hash(tcx) - .map(|hash| { - let node = builder.map(dep_node).unwrap(); - SerializedHash { node: node, hash: hash } - }) + hcx.hash(dep_node) + .map(|hash| { + let node = builder.map(dep_node); + SerializedHash { node: node, hash: hash } + }) }) .collect(); - // Create the serialized dep-graph, dropping nodes that are - // from other crates or from inlined items. - // - // FIXME(#32015) fix handling of other crates + // Create the serialized dep-graph. let graph = SerializedDepGraph { nodes: query.nodes().into_iter() - .flat_map(|node| builder.map(node)) + .map(|node| builder.map(node)) .collect(), edges: query.edges().into_iter() - .flat_map(|(source_node, target_node)| { - builder.map(source_node) - .and_then(|source| { - builder.map(target_node) - .map(|target| (source, target)) - }) + .map(|(source_node, target_node)| { + let source = builder.map(source_node); + let target = builder.map(target_node); + (source, target) }) .collect(), hashes: hashes, @@ -130,14 +132,13 @@ pub fn encode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, Ok(()) } -pub fn encode_metadata_hashes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, +pub fn encode_metadata_hashes<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>, encoder: &mut Encoder) -> io::Result<()> { + let tcx = hcx.tcx; let query = tcx.dep_graph.query(); - let mut builder = DefIdDirectoryBuilder::new(tcx); - let serialized_hashes = { // Identify the `MetaData(X)` nodes where `X` is local. These are // the metadata items we export. Downstream crates will want to @@ -152,32 +153,31 @@ pub fn encode_metadata_hashes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, }); // To create the hash for each item `X`, we don't hash the raw - // bytes of the metadata (though in principle we could). Instead, - // we walk the predecessors of `MetaData(X)` from the - // dep-graph. This corresponds to all the inputs that were read to - // construct the metadata. To create the hash for the metadata, we - // hash (the hash of) all of those inputs. + // bytes of the metadata (though in principle we + // could). Instead, we walk the predecessors of `MetaData(X)` + // from the dep-graph. This corresponds to all the inputs that + // were read to construct the metadata. To create the hash for + // the metadata, we hash (the hash of) all of those inputs. let hashes = meta_data_def_ids .map(|def_id| { + assert!(def_id.is_local()); + let dep_node = DepNode::MetaData(def_id); let mut state = SipHasher::new(); - for node in query.transitive_predecessors(DepNode::MetaData(def_id)) { - if let Some(hash) = node.hash(tcx) { + debug!("save: computing metadata hash for {:?}", dep_node); + for node in query.transitive_predecessors(dep_node) { + if let Some(hash) = hcx.hash(node) { + debug!("save: predecessor {:?} has hash {}", node, hash); state.write_u64(hash.to_le()); + } else { + debug!("save: predecessor {:?} cannot be hashed", node); } } - (def_id, state.finish()) - }); - - // Now create the `SerializedHash` data structures that others - // will load later. - let hashes = - hashes - .map(|(def_id, hash)| { - let index = builder.add(def_id).unwrap(); - SerializedHash { - node: DepNode::MetaData(index), - hash: hash + let hash = state.finish(); + debug!("save: metadata hash for {:?} is {}", dep_node, hash); + SerializedMetadataHash { + def_index: def_id.index, + hash: hash, } }); @@ -188,8 +188,6 @@ pub fn encode_metadata_hashes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, }; // Encode everything. - let directory = builder.into_directory(); - try!(directory.encode(encoder)); try!(serialized_hashes.encode(encoder)); Ok(()) diff --git a/src/librustc_incremental/persist/util.rs b/src/librustc_incremental/persist/util.rs index 7acfdb5fffe..a77a9607e77 100644 --- a/src/librustc_incremental/persist/util.rs +++ b/src/librustc_incremental/persist/util.rs @@ -8,9 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use calculate_svh::SvhCalculate; -use rustc::dep_graph::DepNode; -use rustc::hir::def_id::DefId; use rustc::middle::cstore::LOCAL_CRATE; use rustc::ty::TyCtxt; @@ -72,21 +69,3 @@ fn create_dir_racy(path: &Path) -> io::Result<()> { } } -pub trait DepNodeHash { - /// Hash this dep-node, if it is of the kind that we know how to - /// hash. - fn hash<'a, 'tcx>(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Option<u64>; -} - -impl DepNodeHash for DepNode<DefId> { - fn hash<'a, 'tcx>(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Option<u64> { - match *self { - DepNode::Hir(def_id) => { - // FIXME(#32753) -- should we use a distinct hash here - assert!(def_id.is_local()); - Some(tcx.calculate_item_hash(def_id)) - } - _ => None - } - } -} |
