about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2021-09-06 23:58:16 +0000
committerbors <bors@rust-lang.org>2021-09-06 23:58:16 +0000
commit11bbb5231349a0a144d86d5c0c21061a06d1969d (patch)
tree70bcc7e4fddcbc40043ae83f99c9955dce8a8f55
parent1698e3cac54aa8691d4e9e207567672af8231cb6 (diff)
parentbcefd487c380b113d81ac066ea9b3b4b65e9efe7 (diff)
downloadrust-11bbb5231349a0a144d86d5c0c21061a06d1969d.tar.gz
rust-11bbb5231349a0a144d86d5c0c21061a06d1969d.zip
Auto merge of #83214 - cjgillot:dep-map, r=michaelwoerister
Mmap the incremental data instead of reading it.

Instead of reading the full incremental state using `fs::read_file`, we memmap it using a private read-only file-backed map.
This allows the system to reclaim any memory we are not using, while ensuring we are not polluted by
outside modifications to the file.

Suggested in https://github.com/rust-lang/rust/pull/83036#issuecomment-800458082 by `@bjorn3`
-rw-r--r--compiler/rustc_incremental/src/persist/file_format.rs78
-rw-r--r--compiler/rustc_incremental/src/persist/load.rs3
-rw-r--r--compiler/rustc_incremental/src/persist/save.rs67
-rw-r--r--compiler/rustc_middle/src/ty/context.rs5
-rw-r--r--compiler/rustc_query_impl/src/on_disk_cache.rs58
5 files changed, 123 insertions, 88 deletions
diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs
index b821ed6cff9..572a4fc6971 100644
--- a/compiler/rustc_incremental/src/persist/file_format.rs
+++ b/compiler/rustc_incremental/src/persist/file_format.rs
@@ -12,10 +12,12 @@
 use std::env;
 use std::fs;
 use std::io::{self, Read};
-use std::path::Path;
+use std::path::{Path, PathBuf};
 
+use rustc_data_structures::memmap::Mmap;
 use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
 use rustc_serialize::Encoder;
+use rustc_session::Session;
 
 /// The first few bytes of files generated by incremental compilation.
 const FILE_MAGIC: &[u8] = b"RSIC";
@@ -28,7 +30,7 @@ const HEADER_FORMAT_VERSION: u16 = 0;
 /// the Git commit hash.
 const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION");
 
-pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
+pub(crate) fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
     stream.emit_raw_bytes(FILE_MAGIC)?;
     stream.emit_raw_bytes(&[
         (HEADER_FORMAT_VERSION >> 0) as u8,
@@ -41,6 +43,61 @@ pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileE
     stream.emit_raw_bytes(rustc_version.as_bytes())
 }
 
+pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
+where
+    F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
+{
+    debug!("save: storing data in {}", path_buf.display());
+
+    // Delete the old file, if any.
+    // Note: It's important that we actually delete the old file and not just
+    // truncate and overwrite it, since it might be a shared hard-link, the
+    // underlying data of which we don't want to modify.
+    //
+    // We have to ensure we have dropped the memory maps to this file
+    // before performing this removal.
+    match fs::remove_file(&path_buf) {
+        Ok(()) => {
+            debug!("save: remove old file");
+        }
+        Err(err) if err.kind() == io::ErrorKind::NotFound => (),
+        Err(err) => {
+            sess.err(&format!(
+                "unable to delete old {} at `{}`: {}",
+                name,
+                path_buf.display(),
+                err
+            ));
+            return;
+        }
+    }
+
+    let mut encoder = match FileEncoder::new(&path_buf) {
+        Ok(encoder) => encoder,
+        Err(err) => {
+            sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
+            return;
+        }
+    };
+
+    if let Err(err) = write_file_header(&mut encoder, sess.is_nightly_build()) {
+        sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
+        return;
+    }
+
+    if let Err(err) = encode(&mut encoder) {
+        sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
+        return;
+    }
+
+    if let Err(err) = encoder.flush() {
+        sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
+        return;
+    }
+
+    debug!("save: data written to disk successfully");
+}
+
 /// Reads the contents of a file with a file header as defined in this module.
 ///
 /// - Returns `Ok(Some(data, pos))` if the file existed and was generated by a
@@ -54,14 +111,21 @@ pub fn read_file(
     report_incremental_info: bool,
     path: &Path,
     nightly_build: bool,
-) -> io::Result<Option<(Vec<u8>, usize)>> {
-    let data = match fs::read(path) {
-        Ok(data) => data,
+) -> io::Result<Option<(Mmap, usize)>> {
+    let file = match fs::File::open(path) {
+        Ok(file) => file,
         Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
         Err(err) => return Err(err),
     };
+    // SAFETY: This process must not modify nor remove the backing file while the memory map lives.
+    // For the dep-graph and the work product index, it is as soon as the decoding is done.
+    // For the query result cache, the memory map is dropped in save_dep_graph before calling
+    // save_in and trying to remove the backing file.
+    //
+    // There is no way to prevent another process from modifying this file.
+    let mmap = unsafe { Mmap::map(file) }?;
 
-    let mut file = io::Cursor::new(data);
+    let mut file = io::Cursor::new(&*mmap);
 
     // Check FILE_MAGIC
     {
@@ -103,7 +167,7 @@ pub fn read_file(
     }
 
     let post_header_start_pos = file.position() as usize;
-    Ok(Some((file.into_inner(), post_header_start_pos)))
+    Ok(Some((mmap, post_header_start_pos)))
 }
 
 fn report_format_mismatch(report_incremental_info: bool, file: &Path, message: &str) {
diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs
index 437d5596447..4d38556e5d2 100644
--- a/compiler/rustc_incremental/src/persist/load.rs
+++ b/compiler/rustc_incremental/src/persist/load.rs
@@ -1,6 +1,7 @@
 //! Code to save/load the dep-graph from files.
 
 use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::memmap::Mmap;
 use rustc_middle::dep_graph::{SerializedDepGraph, WorkProduct, WorkProductId};
 use rustc_middle::ty::OnDiskCache;
 use rustc_serialize::opaque::Decoder;
@@ -48,7 +49,7 @@ fn load_data(
     report_incremental_info: bool,
     path: &Path,
     nightly_build: bool,
-) -> LoadResult<(Vec<u8>, usize)> {
+) -> LoadResult<(Mmap, usize)> {
     match file_format::read_file(report_incremental_info, path, nightly_build) {
         Ok(Some(data_and_pos)) => LoadResult::Ok { data: data_and_pos },
         Ok(None) => {
diff --git a/compiler/rustc_incremental/src/persist/save.rs b/compiler/rustc_incremental/src/persist/save.rs
index a8455854ebb..6c683058b12 100644
--- a/compiler/rustc_incremental/src/persist/save.rs
+++ b/compiler/rustc_incremental/src/persist/save.rs
@@ -6,8 +6,6 @@ use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
 use rustc_serialize::Encodable as RustcEncodable;
 use rustc_session::Session;
 use std::fs;
-use std::io;
-use std::path::PathBuf;
 
 use super::data::*;
 use super::dirty_clean;
@@ -44,7 +42,14 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
         join(
             move || {
                 sess.time("incr_comp_persist_result_cache", || {
-                    save_in(sess, query_cache_path, "query cache", |e| encode_query_cache(tcx, e));
+                    // Drop the memory map so that we can remove the file and write to it.
+                    if let Some(odc) = &tcx.on_disk_cache {
+                        odc.drop_serialized_data(tcx);
+                    }
+
+                    file_format::save_in(sess, query_cache_path, "query cache", |e| {
+                        encode_query_cache(tcx, e)
+                    });
                 });
             },
             move || {
@@ -86,7 +91,9 @@ pub fn save_work_product_index(
     debug!("save_work_product_index()");
     dep_graph.assert_ignored();
     let path = work_products_path(sess);
-    save_in(sess, path, "work product index", |e| encode_work_product_index(&new_work_products, e));
+    file_format::save_in(sess, path, "work product index", |e| {
+        encode_work_product_index(&new_work_products, e)
+    });
 
     // We also need to clean out old work-products, as not all of them are
     // deleted during invalidation. Some object files don't change their
@@ -113,58 +120,6 @@ pub fn save_work_product_index(
     });
 }
 
-pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
-where
-    F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
-{
-    debug!("save: storing data in {}", path_buf.display());
-
-    // Delete the old file, if any.
-    // Note: It's important that we actually delete the old file and not just
-    // truncate and overwrite it, since it might be a shared hard-link, the
-    // underlying data of which we don't want to modify
-    match fs::remove_file(&path_buf) {
-        Ok(()) => {
-            debug!("save: remove old file");
-        }
-        Err(err) if err.kind() == io::ErrorKind::NotFound => (),
-        Err(err) => {
-            sess.err(&format!(
-                "unable to delete old {} at `{}`: {}",
-                name,
-                path_buf.display(),
-                err
-            ));
-            return;
-        }
-    }
-
-    let mut encoder = match FileEncoder::new(&path_buf) {
-        Ok(encoder) => encoder,
-        Err(err) => {
-            sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
-            return;
-        }
-    };
-
-    if let Err(err) = file_format::write_file_header(&mut encoder, sess.is_nightly_build()) {
-        sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
-        return;
-    }
-
-    if let Err(err) = encode(&mut encoder) {
-        sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
-        return;
-    }
-
-    if let Err(err) = encoder.flush() {
-        sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
-        return;
-    }
-
-    debug!("save: data written to disk successfully");
-}
-
 fn encode_work_product_index(
     work_products: &FxHashMap<WorkProductId, WorkProduct>,
     encoder: &mut FileEncoder,
diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs
index de7c6d9e095..1f5057d1da2 100644
--- a/compiler/rustc_middle/src/ty/context.rs
+++ b/compiler/rustc_middle/src/ty/context.rs
@@ -27,6 +27,7 @@ use crate::ty::{
 use rustc_ast as ast;
 use rustc_attr as attr;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
+use rustc_data_structures::memmap::Mmap;
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::sharded::{IntoPointer, ShardedHashMap};
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
@@ -71,7 +72,7 @@ use std::sync::Arc;
 
 pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync {
     /// Creates a new `OnDiskCache` instance from the serialized data in `data`.
-    fn new(sess: &'tcx Session, data: Vec<u8>, start_pos: usize) -> Self
+    fn new(sess: &'tcx Session, data: Mmap, start_pos: usize) -> Self
     where
         Self: Sized;
 
@@ -100,6 +101,8 @@ pub trait OnDiskCache<'tcx>: rustc_data_structures::sync::Sync {
     fn register_reused_dep_node(&self, tcx: TyCtxt<'tcx>, dep_node: &DepNode);
     fn store_foreign_def_id_hash(&self, def_id: DefId, hash: DefPathHash);
 
+    fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>);
+
     fn serialize(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult;
 }
 
diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs
index ee64f22618e..5c2803c67e7 100644
--- a/compiler/rustc_query_impl/src/on_disk_cache.rs
+++ b/compiler/rustc_query_impl/src/on_disk_cache.rs
@@ -1,6 +1,7 @@
 use crate::QueryCtxt;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet};
-use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell};
+use rustc_data_structures::memmap::Mmap;
+use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, OnceCell, RwLock};
 use rustc_data_structures::unhash::UnhashMap;
 use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, LOCAL_CRATE};
 use rustc_hir::definitions::DefPathHash;
@@ -42,7 +43,7 @@ const TAG_EXPN_DATA: u8 = 1;
 /// any side effects that have been emitted during a query.
 pub struct OnDiskCache<'sess> {
     // The complete cache data in serialized form.
-    serialized_data: Vec<u8>,
+    serialized_data: RwLock<Option<Mmap>>,
 
     // Collects all `QuerySideEffects` created during the current compilation
     // session.
@@ -182,7 +183,8 @@ impl EncodedSourceFileId {
 }
 
 impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
-    fn new(sess: &'sess Session, data: Vec<u8>, start_pos: usize) -> Self {
+    /// Creates a new `OnDiskCache` instance from the serialized data in `data`.
+    fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self {
         debug_assert!(sess.opts.incremental.is_some());
 
         // Wrap in a scope so we can borrow `data`.
@@ -204,7 +206,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
         };
 
         Self {
-            serialized_data: data,
+            serialized_data: RwLock::new(Some(data)),
             file_index_to_stable_id: footer.file_index_to_stable_id,
             file_index_to_file: Default::default(),
             cnum_map: OnceCell::new(),
@@ -225,7 +227,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
 
     fn new_empty(source_map: &'sess SourceMap) -> Self {
         Self {
-            serialized_data: Vec::new(),
+            serialized_data: RwLock::new(None),
             file_index_to_stable_id: Default::default(),
             file_index_to_file: Default::default(),
             cnum_map: OnceCell::new(),
@@ -244,7 +246,31 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
         }
     }
 
-    fn serialize(&self, tcx: TyCtxt<'sess>, encoder: &mut FileEncoder) -> FileEncodeResult {
+    /// Execute all cache promotions and release the serialized backing Mmap.
+    ///
+    /// Cache promotions require invoking queries, which needs to read the serialized data.
+    /// In order to serialize the new on-disk cache, the former on-disk cache file needs to be
+    /// deleted, hence we won't be able to refer to its memmapped data.
+    fn drop_serialized_data(&self, tcx: TyCtxt<'tcx>) {
+        // Register any dep nodes that we reused from the previous session,
+        // but didn't `DepNode::construct` in this session. This ensures
+        // that their `DefPathHash` to `RawDefId` mappings are registered
+        // in 'latest_foreign_def_path_hashes' if necessary, since that
+        // normally happens in `DepNode::construct`.
+        tcx.dep_graph.register_reused_dep_nodes(tcx);
+
+        // Load everything into memory so we can write it out to the on-disk
+        // cache. The vast majority of cacheable query results should already
+        // be in memory, so this should be a cheap operation.
+        // Do this *before* we clone 'latest_foreign_def_path_hashes', since
+        // loading existing queries may cause us to create new DepNodes, which
+        // may in turn end up invoking `store_foreign_def_id_hash`
+        tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx));
+
+        *self.serialized_data.write() = None;
+    }
+
+    fn serialize<'tcx>(&self, tcx: TyCtxt<'tcx>, encoder: &mut FileEncoder) -> FileEncodeResult {
         // Serializing the `DepGraph` should not modify it.
         tcx.dep_graph.with_ignore(|| {
             // Allocate `SourceFileIndex`es.
@@ -266,21 +292,6 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
                 (file_to_file_index, file_index_to_stable_id)
             };
 
-            // Register any dep nodes that we reused from the previous session,
-            // but didn't `DepNode::construct` in this session. This ensures
-            // that their `DefPathHash` to `RawDefId` mappings are registered
-            // in 'latest_foreign_def_path_hashes' if necessary, since that
-            // normally happens in `DepNode::construct`.
-            tcx.dep_graph.register_reused_dep_nodes(tcx);
-
-            // Load everything into memory so we can write it out to the on-disk
-            // cache. The vast majority of cacheable query results should already
-            // be in memory, so this should be a cheap operation.
-            // Do this *before* we clone 'latest_foreign_def_path_hashes', since
-            // loading existing queries may cause us to create new DepNodes, which
-            // may in turn end up invoking `store_foreign_def_id_hash`
-            tcx.dep_graph.exec_cache_promotions(QueryCtxt::from_tcx(tcx));
-
             let latest_foreign_def_path_hashes = self.latest_foreign_def_path_hashes.lock().clone();
             let hygiene_encode_context = HygieneEncodeContext::default();
 
@@ -564,7 +575,7 @@ impl<'sess> OnDiskCache<'sess> {
         })
     }
 
-    fn with_decoder<'a, 'tcx, T, F: FnOnce(&mut CacheDecoder<'sess, 'tcx>) -> T>(
+    fn with_decoder<'a, 'tcx, T, F: for<'s> FnOnce(&mut CacheDecoder<'s, 'tcx>) -> T>(
         &'sess self,
         tcx: TyCtxt<'tcx>,
         pos: AbsoluteBytePos,
@@ -575,9 +586,10 @@ impl<'sess> OnDiskCache<'sess> {
     {
         let cnum_map = self.cnum_map.get_or_init(|| Self::compute_cnum_map(tcx));
 
+        let serialized_data = self.serialized_data.read();
         let mut decoder = CacheDecoder {
             tcx,
-            opaque: opaque::Decoder::new(&self.serialized_data[..], pos.to_usize()),
+            opaque: opaque::Decoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()),
             source_map: self.source_map,
             cnum_map,
             file_index_to_file: &self.file_index_to_file,