about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2023-07-19 16:26:29 +0000
committerbors <bors@rust-lang.org>2023-07-19 16:26:29 +0000
commitfdfcdad4fa9ae80c20ffd61e54373eb16cd0c044 (patch)
tree79bfbeab825db731ddcea67f6226037d436ff37c
parent77e24f90f599070af2d8051ef9adad7fe528dd78 (diff)
parent186be726d65d053626cbe7ce3b943dac7554268a (diff)
downloadrust-fdfcdad4fa9ae80c20ffd61e54373eb16cd0c044.tar.gz
rust-fdfcdad4fa9ae80c20ffd61e54373eb16cd0c044.zip
Auto merge of #113850 - cjgillot:span-shorthand, r=compiler-errors
Encode shorthands for spans in metadata.

Spans occupy a typically large proportion of metadata.
This PR deduplicates encoded spans in order to reduce encoded length.
-rw-r--r--compiler/rustc_metadata/src/rmeta/decoder.rs21
-rw-r--r--compiler/rustc_metadata/src/rmeta/encoder.rs34
-rw-r--r--compiler/rustc_metadata/src/rmeta/mod.rs6
3 files changed, 48 insertions, 13 deletions
diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs
index 8fa1d365728..9e67bb655d4 100644
--- a/compiler/rustc_metadata/src/rmeta/decoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/decoder.rs
@@ -34,7 +34,7 @@ use rustc_session::cstore::{
 use rustc_session::Session;
 use rustc_span::hygiene::ExpnIndex;
 use rustc_span::symbol::{kw, Ident, Symbol};
-use rustc_span::{self, BytePos, ExpnId, Pos, Span, SyntaxContext, DUMMY_SP};
+use rustc_span::{self, BytePos, ExpnId, Pos, Span, SpanData, SyntaxContext, DUMMY_SP};
 
 use proc_macro::bridge::client::ProcMacro;
 use std::iter::TrustedLen;
@@ -513,11 +513,26 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnId {
 
 impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
     fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span {
+        let mode = SpanEncodingMode::decode(decoder);
+        let data = match mode {
+            SpanEncodingMode::Direct => SpanData::decode(decoder),
+            SpanEncodingMode::Shorthand(position) => decoder.with_position(position, |decoder| {
+                let mode = SpanEncodingMode::decode(decoder);
+                debug_assert!(matches!(mode, SpanEncodingMode::Direct));
+                SpanData::decode(decoder)
+            }),
+        };
+        Span::new(data.lo, data.hi, data.ctxt, data.parent)
+    }
+}
+
+impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SpanData {
+    fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> SpanData {
         let ctxt = SyntaxContext::decode(decoder);
         let tag = u8::decode(decoder);
 
         if tag == TAG_PARTIAL_SPAN {
-            return DUMMY_SP.with_ctxt(ctxt);
+            return DUMMY_SP.with_ctxt(ctxt).data();
         }
 
         debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
@@ -612,7 +627,7 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
         let hi = hi + source_file.translated_source_file.start_pos;
 
         // Do not try to decode parent for foreign spans.
-        Span::new(lo, hi, ctxt, None)
+        SpanData { lo, hi, ctxt, parent: None }
     }
 }
 
diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs
index 2785732727f..ac86110f2bd 100644
--- a/compiler/rustc_metadata/src/rmeta/encoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/encoder.rs
@@ -37,7 +37,7 @@ use rustc_session::config::{CrateType, OptLevel};
 use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib};
 use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind};
 use rustc_span::symbol::{sym, Symbol};
-use rustc_span::{self, ExternalSource, FileName, SourceFile, Span, SyntaxContext};
+use rustc_span::{self, ExternalSource, FileName, SourceFile, Span, SpanData, SyntaxContext};
 use std::borrow::Borrow;
 use std::collections::hash_map::Entry;
 use std::hash::Hash;
@@ -53,6 +53,7 @@ pub(super) struct EncodeContext<'a, 'tcx> {
     tables: TableBuilders,
 
     lazy_state: LazyState,
+    span_shorthands: FxHashMap<Span, usize>,
     type_shorthands: FxHashMap<Ty<'tcx>, usize>,
     predicate_shorthands: FxHashMap<ty::PredicateKind<'tcx>, usize>,
 
@@ -177,8 +178,20 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for ExpnId {
 
 impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
     fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
-        let span = self.data();
+        match s.span_shorthands.entry(*self) {
+            Entry::Occupied(o) => SpanEncodingMode::Shorthand(*o.get()).encode(s),
+            Entry::Vacant(v) => {
+                let position = s.opaque.position();
+                v.insert(position);
+                SpanEncodingMode::Direct.encode(s);
+                self.data().encode(s);
+            }
+        }
+    }
+}
 
+impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SpanData {
+    fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
         // Don't serialize any `SyntaxContext`s from a proc-macro crate,
         // since we don't load proc-macro dependencies during serialization.
         // This means that any hygiene information from macros used *within*
@@ -213,7 +226,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
         if s.is_proc_macro {
             SyntaxContext::root().encode(s);
         } else {
-            span.ctxt.encode(s);
+            self.ctxt.encode(s);
         }
 
         if self.is_dummy() {
@@ -221,18 +234,18 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
         }
 
         // The Span infrastructure should make sure that this invariant holds:
-        debug_assert!(span.lo <= span.hi);
+        debug_assert!(self.lo <= self.hi);
 
-        if !s.source_file_cache.0.contains(span.lo) {
+        if !s.source_file_cache.0.contains(self.lo) {
             let source_map = s.tcx.sess.source_map();
-            let source_file_index = source_map.lookup_source_file_idx(span.lo);
+            let source_file_index = source_map.lookup_source_file_idx(self.lo);
             s.source_file_cache =
                 (source_map.files()[source_file_index].clone(), source_file_index);
         }
         let (ref source_file, source_file_index) = s.source_file_cache;
-        debug_assert!(source_file.contains(span.lo));
+        debug_assert!(source_file.contains(self.lo));
 
-        if !source_file.contains(span.hi) {
+        if !source_file.contains(self.hi) {
             // Unfortunately, macro expansion still sometimes generates Spans
             // that malformed in this way.
             return TAG_PARTIAL_SPAN.encode(s);
@@ -286,11 +299,11 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
 
         // Encode the start position relative to the file start, so we profit more from the
         // variable-length integer encoding.
-        let lo = span.lo - source_file.start_pos;
+        let lo = self.lo - source_file.start_pos;
 
         // Encode length which is usually less than span.hi and profits more
         // from the variable-length integer encoding that we use.
-        let len = span.hi - span.lo;
+        let len = self.hi - self.lo;
 
         tag.encode(s);
         lo.encode(s);
@@ -2182,6 +2195,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) {
         feat: tcx.features(),
         tables: Default::default(),
         lazy_state: LazyState::NoNode,
+        span_shorthands: Default::default(),
         type_shorthands: Default::default(),
         predicate_shorthands: Default::default(),
         source_file_cache,
diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs
index f6087fbe8f6..0bc16fc64ff 100644
--- a/compiler/rustc_metadata/src/rmeta/mod.rs
+++ b/compiler/rustc_metadata/src/rmeta/mod.rs
@@ -66,6 +66,12 @@ const METADATA_VERSION: u8 = 8;
 /// unsigned integer, and further followed by the rustc version string.
 pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION];
 
+#[derive(Encodable, Decodable)]
+enum SpanEncodingMode {
+    Shorthand(usize),
+    Direct,
+}
+
 /// A value of type T referred to by its absolute position
 /// in the metadata, and which can be decoded lazily.
 ///