about summary refs log tree commit diff
path: root/compiler/rustc_query_impl/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2022-08-18 23:53:22 +0000
committerbors <bors@rust-lang.org>2022-08-18 23:53:22 +0000
commit71ecf5d359bf750cc171e124779a46985633439d (patch)
treed0c3e7aeea857d9dfddc1456d714b87c26750c9c /compiler/rustc_query_impl/src
parent0b79f758c9aa6646606662a6d623a0752286cd17 (diff)
parentadba4691f6577b35b2d48ebbe6c8e993eea09978 (diff)
downloadrust-71ecf5d359bf750cc171e124779a46985633439d.tar.gz
rust-71ecf5d359bf750cc171e124779a46985633439d.zip
Auto merge of #98851 - klensy:encode_symbols, r=cjgillot
rustc_metadata: dedupe strings to prevent multiple copies in rmeta/query cache blow file size

r? `@cjgillot`

Encodes strings in rmeta/query cache so duplicated ones will be encoded as offsets to first strings, reducing file size.
Diffstat (limited to 'compiler/rustc_query_impl/src')
-rw-r--r--compiler/rustc_query_impl/src/on_disk_cache.rs58
1 files changed, 57 insertions, 1 deletions
diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs
index 56fd90c9855..6711dd3d5c5 100644
--- a/compiler/rustc_query_impl/src/on_disk_cache.rs
+++ b/compiler/rustc_query_impl/src/on_disk_cache.rs
@@ -22,8 +22,9 @@ use rustc_span::hygiene::{
     ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
 };
 use rustc_span::source_map::{SourceMap, StableSourceFileId};
-use rustc_span::CachingSourceMapView;
 use rustc_span::{BytePos, ExpnData, ExpnHash, Pos, SourceFile, Span};
+use rustc_span::{CachingSourceMapView, Symbol};
+use std::collections::hash_map::Entry;
 use std::io;
 use std::mem;
 
@@ -38,6 +39,10 @@ const TAG_RELATIVE_SPAN: u8 = 2;
 const TAG_SYNTAX_CONTEXT: u8 = 0;
 const TAG_EXPN_DATA: u8 = 1;
 
+// Tags for encoding Symbol's
+const SYMBOL_STR: u8 = 0;
+const SYMBOL_OFFSET: u8 = 1;
+
 /// Provides an interface to incremental compilation data cached from the
 /// previous compilation session. This data will eventually include the results
 /// of a few selected queries (like `typeck` and `mir_optimized`) and
@@ -254,6 +259,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
                 source_map: CachingSourceMapView::new(tcx.sess.source_map()),
                 file_to_file_index,
                 hygiene_context: &hygiene_encode_context,
+                symbol_table: Default::default(),
             };
 
             // Encode query results.
@@ -714,6 +720,36 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Span {
     }
 }
 
+// copy&paste impl from rustc_metadata
+impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Symbol {
+    fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
+        let tag = d.read_u8();
+
+        match tag {
+            SYMBOL_STR => {
+                let s = d.read_str();
+                Symbol::intern(s)
+            }
+            SYMBOL_OFFSET => {
+                // read str offset
+                let pos = d.read_usize();
+                let old_pos = d.opaque.position();
+
+                // move to str ofset and read
+                d.opaque.set_position(pos);
+                let s = d.read_str();
+                let sym = Symbol::intern(s);
+
+                // restore position
+                d.opaque.set_position(old_pos);
+
+                sym
+            }
+            _ => unreachable!(),
+        }
+    }
+}
+
 impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for CrateNum {
     fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
         let stable_id = StableCrateId::decode(d);
@@ -815,6 +851,7 @@ pub struct CacheEncoder<'a, 'tcx> {
     source_map: CachingSourceMapView<'tcx>,
     file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>,
     hygiene_context: &'a HygieneEncodeContext,
+    symbol_table: FxHashMap<Symbol, usize>,
 }
 
 impl<'a, 'tcx> CacheEncoder<'a, 'tcx> {
@@ -899,6 +936,25 @@ impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Span {
     }
 }
 
+// copy&paste impl from rustc_metadata
+impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Symbol {
+    fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) {
+        match s.symbol_table.entry(*self) {
+            Entry::Vacant(o) => {
+                s.encoder.emit_u8(SYMBOL_STR);
+                let pos = s.encoder.position();
+                o.insert(pos);
+                s.emit_str(self.as_str());
+            }
+            Entry::Occupied(o) => {
+                let x = o.get().clone();
+                s.emit_u8(SYMBOL_OFFSET);
+                s.emit_usize(x);
+            }
+        }
+    }
+}
+
 impl<'a, 'tcx> TyEncoder for CacheEncoder<'a, 'tcx> {
     type I = TyCtxt<'tcx>;
     const CLEAR_CROSS_CRATE: bool = false;