about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAaron Hill <aa1ronham@gmail.com>2020-07-24 03:01:07 -0400
committerAaron Hill <aa1ronham@gmail.com>2020-07-26 18:37:03 -0400
commitf622f45afdd7688e19d511799e978c1c66889fb4 (patch)
treea1d0110cb7a78c87e81a7db1cb97af349eb0bd00
parentcf7bef302abb0fc7ab8bf40f22c3bc3a6aca68ff (diff)
downloadrust-f622f45afdd7688e19d511799e978c1c66889fb4.tar.gz
rust-f622f45afdd7688e19d511799e978c1c66889fb4.zip
Share serialization optimization between incr and metadata
-rw-r--r--src/librustc_metadata/rmeta/decoder.rs8
-rw-r--r--src/librustc_metadata/rmeta/encoder.rs160
-rw-r--r--src/librustc_middle/ty/query/on_disk_cache.rs62
-rw-r--r--src/librustc_span/hygiene.rs144
-rw-r--r--src/librustc_span/lib.rs1
-rw-r--r--src/test/incremental/hygiene/auxiliary/cached_hygiene.rs1
6 files changed, 198 insertions, 178 deletions
diff --git a/src/librustc_metadata/rmeta/decoder.rs b/src/librustc_metadata/rmeta/decoder.rs
index 7d428840cc9..df4bb2502cb 100644
--- a/src/librustc_metadata/rmeta/decoder.rs
+++ b/src/librustc_metadata/rmeta/decoder.rs
@@ -46,7 +46,7 @@ use std::num::NonZeroUsize;
 use std::path::Path;
 
 pub use cstore_impl::{provide, provide_extern};
-use rustc_span::hygiene::HygieneContext;
+use rustc_span::hygiene::HygieneDecodeContext;
 
 mod cstore_impl;
 
@@ -111,10 +111,10 @@ crate struct CrateMetadata {
 
     /// Additional data used for decoding `HygieneData` (e.g. `SyntaxContext`
     /// and `ExpnId`).
-    /// Note that we store a `HygieneContext` for each `CrateMetadat`. This is
+    /// Note that we store a `HygieneDecodeContext` for each `CrateMetadat`. This is
     /// because `SyntaxContext` ids are not globally unique, so we need
     /// to track which ids we've decoded on a per-crate basis.
-    hygiene_context: HygieneContext,
+    hygiene_context: HygieneDecodeContext,
 
     // --- Data used only for improving diagnostics ---
     /// Information about the `extern crate` item or path that caused this crate to be loaded.
@@ -1671,7 +1671,7 @@ impl CrateMetadata {
             private_dep,
             host_hash,
             extern_crate: Lock::new(None),
-            hygiene_context: HygieneContext::new(),
+            hygiene_context: Default::default(),
         }
     }
 
diff --git a/src/librustc_metadata/rmeta/encoder.rs b/src/librustc_metadata/rmeta/encoder.rs
index 076ed679834..dc8d14a44f8 100644
--- a/src/librustc_metadata/rmeta/encoder.rs
+++ b/src/librustc_metadata/rmeta/encoder.rs
@@ -30,7 +30,7 @@ use rustc_middle::ty::codec::{self as ty_codec, TyEncoder};
 use rustc_middle::ty::{self, SymbolName, Ty, TyCtxt};
 use rustc_serialize::{opaque, Encodable, Encoder, SpecializedEncoder, UseSpecializedEncodable};
 use rustc_session::config::CrateType;
-use rustc_span::hygiene::ExpnDataEncodeMode;
+use rustc_span::hygiene::{ExpnDataEncodeMode, HygieneEncodeContext};
 use rustc_span::source_map::Spanned;
 use rustc_span::symbol::{sym, Ident, Symbol};
 use rustc_span::{self, ExternalSource, FileName, SourceFile, Span, SyntaxContext};
@@ -39,7 +39,7 @@ use std::hash::Hash;
 use std::num::NonZeroUsize;
 use std::path::Path;
 
-struct EncodeContext<'tcx> {
+struct EncodeContext<'a, 'tcx> {
     opaque: opaque::Encoder,
     tcx: TyCtxt<'tcx>,
 
@@ -67,15 +67,7 @@ struct EncodeContext<'tcx> {
     // with a result containing a foreign `Span`.
     required_source_files: Option<GrowableBitSet<usize>>,
     is_proc_macro: bool,
-    /// All `SyntaxContexts` for which we have writen `SyntaxContextData` into crate metadata.
-    /// This is `None` after we finish encoding `SyntaxContexts`, to ensure
-    /// that we don't accidentally try to encode any more `SyntaxContexts`
-    serialized_ctxts: Option<FxHashSet<SyntaxContext>>,
-    /// The `SyntaxContexts` that we have serialized (e.g. as a result of encoding `Spans`)
-    /// in the most recent 'round' of serializnig. Serializing `SyntaxContextData`
-    /// may cause us to serialize more `SyntaxContext`s, so serialize in a loop
-    /// until we reach a fixed point.
-    latest_ctxts: Option<FxHashSet<SyntaxContext>>,
+    hygiene_ctxt: &'a HygieneEncodeContext,
 }
 
 macro_rules! encoder_methods {
@@ -86,7 +78,7 @@ macro_rules! encoder_methods {
     }
 }
 
-impl<'tcx> Encoder for EncodeContext<'tcx> {
+impl<'a, 'tcx> Encoder for EncodeContext<'a, 'tcx> {
     type Error = <opaque::Encoder as Encoder>::Error;
 
     #[inline]
@@ -117,13 +109,13 @@ impl<'tcx> Encoder for EncodeContext<'tcx> {
     }
 }
 
-impl<'tcx, T> SpecializedEncoder<Lazy<T, ()>> for EncodeContext<'tcx> {
+impl<'a, 'tcx, T> SpecializedEncoder<Lazy<T, ()>> for EncodeContext<'a, 'tcx> {
     fn specialized_encode(&mut self, lazy: &Lazy<T>) -> Result<(), Self::Error> {
         self.emit_lazy_distance(*lazy)
     }
 }
 
-impl<'tcx, T> SpecializedEncoder<Lazy<[T], usize>> for EncodeContext<'tcx> {
+impl<'a, 'tcx, T> SpecializedEncoder<Lazy<[T], usize>> for EncodeContext<'a, 'tcx> {
     fn specialized_encode(&mut self, lazy: &Lazy<[T]>) -> Result<(), Self::Error> {
         self.emit_usize(lazy.meta)?;
         if lazy.meta == 0 {
@@ -133,7 +125,7 @@ impl<'tcx, T> SpecializedEncoder<Lazy<[T], usize>> for EncodeContext<'tcx> {
     }
 }
 
-impl<'tcx, I: Idx, T> SpecializedEncoder<Lazy<Table<I, T>, usize>> for EncodeContext<'tcx>
+impl<'a, 'tcx, I: Idx, T> SpecializedEncoder<Lazy<Table<I, T>, usize>> for EncodeContext<'a, 'tcx>
 where
     Option<T>: FixedSizeEncoding,
 {
@@ -143,14 +135,14 @@ where
     }
 }
 
-impl<'tcx> SpecializedEncoder<CrateNum> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<CrateNum> for EncodeContext<'a, 'tcx> {
     #[inline]
     fn specialized_encode(&mut self, cnum: &CrateNum) -> Result<(), Self::Error> {
         self.emit_u32(cnum.as_u32())
     }
 }
 
-impl<'tcx> SpecializedEncoder<DefId> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<DefId> for EncodeContext<'a, 'tcx> {
     #[inline]
     fn specialized_encode(&mut self, def_id: &DefId) -> Result<(), Self::Error> {
         let DefId { krate, index } = *def_id;
@@ -160,29 +152,31 @@ impl<'tcx> SpecializedEncoder<DefId> for EncodeContext<'tcx> {
     }
 }
 
-impl<'tcx> SpecializedEncoder<SyntaxContext> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<SyntaxContext> for EncodeContext<'a, 'tcx> {
     fn specialized_encode(&mut self, ctxt: &SyntaxContext) -> Result<(), Self::Error> {
-        if !self.serialized_ctxts.as_ref().unwrap().contains(ctxt) {
-            self.latest_ctxts.as_mut().unwrap().insert(*ctxt);
-        }
-        rustc_span::hygiene::raw_encode_syntax_context(*ctxt, self)
+        rustc_span::hygiene::raw_encode_syntax_context(*ctxt, &self.hygiene_ctxt, self)
     }
 }
 
-impl<'tcx> SpecializedEncoder<ExpnId> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<ExpnId> for EncodeContext<'a, 'tcx> {
     fn specialized_encode(&mut self, expn: &ExpnId) -> Result<(), Self::Error> {
-        rustc_span::hygiene::raw_encode_expn_id(*expn, ExpnDataEncodeMode::Metadata, self)
+        rustc_span::hygiene::raw_encode_expn_id(
+            *expn,
+            &mut self.hygiene_ctxt,
+            ExpnDataEncodeMode::Metadata,
+            self,
+        )
     }
 }
 
-impl<'tcx> SpecializedEncoder<DefIndex> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<DefIndex> for EncodeContext<'a, 'tcx> {
     #[inline]
     fn specialized_encode(&mut self, def_index: &DefIndex) -> Result<(), Self::Error> {
         self.emit_u32(def_index.as_u32())
     }
 }
 
-impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<Span> for EncodeContext<'a, 'tcx> {
     fn specialized_encode(&mut self, span: &Span) -> Result<(), Self::Error> {
         if span.is_dummy() {
             return TAG_INVALID_SPAN.encode(self);
@@ -303,14 +297,14 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
     }
 }
 
-impl<'tcx> SpecializedEncoder<LocalDefId> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<LocalDefId> for EncodeContext<'a, 'tcx> {
     #[inline]
     fn specialized_encode(&mut self, def_id: &LocalDefId) -> Result<(), Self::Error> {
         self.specialized_encode(&def_id.to_def_id())
     }
 }
 
-impl<'a, 'b, 'tcx> SpecializedEncoder<&'a ty::TyS<'b>> for EncodeContext<'tcx>
+impl<'a, 'b, 'c, 'tcx> SpecializedEncoder<&'a ty::TyS<'b>> for EncodeContext<'c, 'tcx>
 where
     &'a ty::TyS<'b>: UseSpecializedEncodable,
 {
@@ -321,7 +315,7 @@ where
     }
 }
 
-impl<'b, 'tcx> SpecializedEncoder<ty::Predicate<'b>> for EncodeContext<'tcx> {
+impl<'a, 'b, 'tcx> SpecializedEncoder<ty::Predicate<'b>> for EncodeContext<'a, 'tcx> {
     fn specialized_encode(&mut self, predicate: &ty::Predicate<'b>) -> Result<(), Self::Error> {
         debug_assert!(self.tcx.lift(predicate).is_some());
         let predicate =
@@ -332,7 +326,7 @@ impl<'b, 'tcx> SpecializedEncoder<ty::Predicate<'b>> for EncodeContext<'tcx> {
     }
 }
 
-impl<'tcx> SpecializedEncoder<interpret::AllocId> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<interpret::AllocId> for EncodeContext<'a, 'tcx> {
     fn specialized_encode(&mut self, alloc_id: &interpret::AllocId) -> Result<(), Self::Error> {
         use std::collections::hash_map::Entry;
         let index = match self.interpret_allocs.entry(*alloc_id) {
@@ -349,13 +343,13 @@ impl<'tcx> SpecializedEncoder<interpret::AllocId> for EncodeContext<'tcx> {
     }
 }
 
-impl<'tcx> SpecializedEncoder<Fingerprint> for EncodeContext<'tcx> {
+impl<'a, 'tcx> SpecializedEncoder<Fingerprint> for EncodeContext<'a, 'tcx> {
     fn specialized_encode(&mut self, f: &Fingerprint) -> Result<(), Self::Error> {
         f.encode_opaque(&mut self.opaque)
     }
 }
 
-impl<'tcx, T> SpecializedEncoder<mir::ClearCrossCrate<T>> for EncodeContext<'tcx>
+impl<'a, 'tcx, T> SpecializedEncoder<mir::ClearCrossCrate<T>> for EncodeContext<'a, 'tcx>
 where
     mir::ClearCrossCrate<T>: UseSpecializedEncodable,
 {
@@ -364,7 +358,7 @@ where
     }
 }
 
-impl<'tcx> TyEncoder for EncodeContext<'tcx> {
+impl<'a, 'tcx> TyEncoder for EncodeContext<'a, 'tcx> {
     fn position(&self) -> usize {
         self.opaque.position()
     }
@@ -372,17 +366,17 @@ impl<'tcx> TyEncoder for EncodeContext<'tcx> {
 
 /// Helper trait to allow overloading `EncodeContext::lazy` for iterators.
 trait EncodeContentsForLazy<T: ?Sized + LazyMeta> {
-    fn encode_contents_for_lazy(self, ecx: &mut EncodeContext<'tcx>) -> T::Meta;
+    fn encode_contents_for_lazy(self, ecx: &mut EncodeContext<'a, 'tcx>) -> T::Meta;
 }
 
 impl<T: Encodable> EncodeContentsForLazy<T> for &T {
-    fn encode_contents_for_lazy(self, ecx: &mut EncodeContext<'tcx>) {
+    fn encode_contents_for_lazy(self, ecx: &mut EncodeContext<'a, 'tcx>) {
         self.encode(ecx).unwrap()
     }
 }
 
 impl<T: Encodable> EncodeContentsForLazy<T> for T {
-    fn encode_contents_for_lazy(self, ecx: &mut EncodeContext<'tcx>) {
+    fn encode_contents_for_lazy(self, ecx: &mut EncodeContext<'a, 'tcx>) {
         self.encode(ecx).unwrap()
     }
 }
@@ -392,7 +386,7 @@ where
     I: IntoIterator,
     I::Item: EncodeContentsForLazy<T>,
 {
-    fn encode_contents_for_lazy(self, ecx: &mut EncodeContext<'tcx>) -> usize {
+    fn encode_contents_for_lazy(self, ecx: &mut EncodeContext<'a, 'tcx>) -> usize {
         self.into_iter().map(|value| value.encode_contents_for_lazy(ecx)).count()
     }
 }
@@ -409,7 +403,7 @@ macro_rules! record {
     }};
 }
 
-impl<'tcx> EncodeContext<'tcx> {
+impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
     fn emit_lazy_distance<T: ?Sized + LazyMeta>(
         &mut self,
         lazy: Lazy<T>,
@@ -628,7 +622,7 @@ impl<'tcx> EncodeContext<'tcx> {
         // Therefore, we need to encode the hygiene data last to ensure that we encode
         // any `SyntaxContext`s that might be used.
         i = self.position();
-        let (syntax_contexts, syntax_bytes, expn_data, expn_bytes) = self.encode_hygiene();
+        let (syntax_contexts, expn_data) = self.encode_hygiene();
         let hygiene_bytes = self.position() - i;
 
         // Encode source_map. This needs to be done last,
@@ -715,8 +709,6 @@ impl<'tcx> EncodeContext<'tcx> {
             println!("            item bytes: {}", item_bytes);
             println!("           table bytes: {}", tables_bytes);
             println!("         hygiene bytes: {}", hygiene_bytes);
-            println!("   SyntaxContext bytes: {}", syntax_bytes);
-            println!("          ExpnId bytes: {}", expn_bytes);
             println!("            zero bytes: {}", zero_bytes);
             println!("           total bytes: {}", total_bytes);
         }
@@ -725,7 +717,7 @@ impl<'tcx> EncodeContext<'tcx> {
     }
 }
 
-impl EncodeContext<'tcx> {
+impl EncodeContext<'a, 'tcx> {
     fn encode_variances_of(&mut self, def_id: DefId) {
         debug!("EncodeContext::encode_variances_of({:?})", def_id);
         record!(self.tables.variances[def_id] <- &self.tcx.variances_of(def_id)[..]);
@@ -1499,75 +1491,23 @@ impl EncodeContext<'tcx> {
         self.lazy(foreign_modules.iter().cloned())
     }
 
-    fn encode_hygiene(&mut self) -> (SyntaxContextTable, usize, ExpnDataTable, usize) {
+    fn encode_hygiene(&mut self) -> (SyntaxContextTable, ExpnDataTable) {
         let mut syntax_contexts: TableBuilder<_, _> = Default::default();
         let mut expn_data_table: TableBuilder<_, _> = Default::default();
 
-        let mut i = self.position();
-        // We need to encode the `ExpnData` *before* we encode
-        // the `SyntaxContextData`, since encoding `ExpnData` may cause
-        // us to use more `SyntaxContexts` when we encode the spans stored
-        // inside `ExpnData`
-        rustc_span::hygiene::for_all_expn_data(|index, expn_data| {
-            // Don't encode the ExpnData for ExpnIds from foreign crates.
-            // The crate that defines the ExpnId will store the ExpnData,
-            // and the metadata decoder will look it from from that crate via the CStore
-            if expn_data.krate == LOCAL_CRATE {
-                expn_data_table.set(index, self.lazy(expn_data));
-            }
-            Ok::<(), !>(())
-        })
-        .unwrap();
-
-        let expn_bytes = self.position() - i;
-
-        i = self.position();
-        let mut num_serialized = 0;
-
-        // When we serialize a `SyntaxContextData`, we may end up serializing
-        // a `SyntaxContext` that we haven't seen before. Therefore,
-        while !self.latest_ctxts.as_ref().unwrap().is_empty() {
-            debug!(
-                "encode_hygiene: Serializing a round of {:?} SyntaxContextDatas: {:?}",
-                self.latest_ctxts.as_ref().unwrap().len(),
-                self.latest_ctxts.as_ref().unwrap()
-            );
-
-            // Consume the current round of SyntaxContexts.
-            let latest = self.latest_ctxts.replace(FxHashSet::default()).unwrap();
-
-            // It's fine to iterate over a HashMap, because thw serialization
-            // of the table that we insert data into doesn't depend on insertion
-            // order
-            rustc_span::hygiene::for_all_data_in(latest.into_iter(), |(index, ctxt, data)| {
-                if self.serialized_ctxts.as_mut().unwrap().insert(ctxt) {
-                    syntax_contexts.set(index, self.lazy(data));
-                    num_serialized += 1;
-                }
-                Ok::<_, !>(())
-            })
-            .unwrap();
-        }
-        debug!("encode_hygiene: Done serializing SyntaxContextData");
-        let syntax_bytes = self.position() - i;
-
-        let total = rustc_span::hygiene::num_syntax_ctxts();
-        debug!(
-            "encode_hygiene: stored {}/{} ({})",
-            num_serialized,
-            total,
-            (num_serialized as f32) / (total as f32)
+        let _: Result<(), !> = self.hygiene_ctxt.encode(
+            &mut (&mut *self, &mut syntax_contexts, &mut expn_data_table),
+            |(this, syntax_contexts, _), index, ctxt_data| {
+                syntax_contexts.set(index, this.lazy(ctxt_data));
+                Ok(())
+            },
+            |(this, _, expn_data_table), index, expn_data| {
+                expn_data_table.set(index, this.lazy(expn_data));
+                Ok(())
+            },
         );
 
-        self.serialized_ctxts.take();
-        self.latest_ctxts.take();
-
-        (
-            syntax_contexts.encode(&mut self.opaque),
-            syntax_bytes,
-            expn_data_table.encode(&mut self.opaque),
-            expn_bytes,
-        )
+        (syntax_contexts.encode(&mut self.opaque), expn_data_table.encode(&mut self.opaque))
     }
 
     fn encode_proc_macros(&mut self) -> Option<Lazy<[DefIndex]>> {
@@ -1759,7 +1699,7 @@ impl EncodeContext<'tcx> {
 }
 
 // FIXME(eddyb) make metadata encoding walk over all definitions, instead of HIR.
-impl Visitor<'tcx> for EncodeContext<'tcx> {
+impl Visitor<'tcx> for EncodeContext<'a, 'tcx> {
     type Map = Map<'tcx>;
 
     fn nested_visit_map(&mut self) -> NestedVisitorMap<Self::Map> {
@@ -1797,7 +1737,7 @@ impl Visitor<'tcx> for EncodeContext<'tcx> {
     }
 }
 
-impl EncodeContext<'tcx> {
+impl EncodeContext<'a, 'tcx> {
     fn encode_fields(&mut self, adt_def: &ty::AdtDef) {
         for (variant_index, variant) in adt_def.variants.iter_enumerated() {
             for (field_index, _field) in variant.fields.iter().enumerate() {
@@ -2051,6 +1991,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
     encoder.emit_raw_bytes(&[0, 0, 0, 0]);
 
     let source_map_files = tcx.sess.source_map().files();
+    let hygiene_ctxt = HygieneEncodeContext::default();
 
     let mut ecx = EncodeContext {
         opaque: encoder,
@@ -2064,8 +2005,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
         interpret_allocs_inverse: Default::default(),
         required_source_files: Some(GrowableBitSet::with_capacity(source_map_files.len())),
         is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro),
-        serialized_ctxts: Some(Default::default()),
-        latest_ctxts: Some(Default::default()),
+        hygiene_ctxt: &hygiene_ctxt,
     };
     drop(source_map_files);
 
diff --git a/src/librustc_middle/ty/query/on_disk_cache.rs b/src/librustc_middle/ty/query/on_disk_cache.rs
index ed330321bdb..643fbe793ab 100644
--- a/src/librustc_middle/ty/query/on_disk_cache.rs
+++ b/src/librustc_middle/ty/query/on_disk_cache.rs
@@ -18,8 +18,8 @@ use rustc_serialize::{
 };
 use rustc_session::{CrateDisambiguator, Session};
 use rustc_span::hygiene::{
-    ExpnDataDecodeMode, ExpnDataEncodeMode, ExpnId, HygieneContext, SyntaxContext,
-    SyntaxContextData,
+    ExpnDataDecodeMode, ExpnDataEncodeMode, ExpnId, HygieneDecodeContext, HygieneEncodeContext,
+    SyntaxContext, SyntaxContextData,
 };
 use rustc_span::source_map::{SourceMap, StableSourceFileId};
 use rustc_span::symbol::Ident;
@@ -83,7 +83,7 @@ pub struct OnDiskCache<'sess> {
     // but it seemed easier to have `OnDiskCache` be independent of the `CStore`.
     expn_data: FxHashMap<u32, AbsoluteBytePos>,
     // Additional information used when decoding hygiene data.
-    hygiene_context: HygieneContext,
+    hygiene_context: HygieneDecodeContext,
 }
 
 // This type is used only for serialization and deserialization.
@@ -158,7 +158,7 @@ impl<'sess> OnDiskCache<'sess> {
             alloc_decoding_state: AllocDecodingState::new(footer.interpret_alloc_index),
             syntax_contexts: footer.syntax_contexts,
             expn_data: footer.expn_data,
-            hygiene_context: HygieneContext::new(),
+            hygiene_context: Default::default(),
         }
     }
 
@@ -176,7 +176,7 @@ impl<'sess> OnDiskCache<'sess> {
             alloc_decoding_state: AllocDecodingState::new(Vec::new()),
             syntax_contexts: FxHashMap::default(),
             expn_data: FxHashMap::default(),
-            hygiene_context: HygieneContext::new(),
+            hygiene_context: Default::default(),
         }
     }
 
@@ -204,6 +204,8 @@ impl<'sess> OnDiskCache<'sess> {
                 (file_to_file_index, file_index_to_stable_id)
             };
 
+            let hygiene_encode_context = HygieneEncodeContext::default();
+
             let mut encoder = CacheEncoder {
                 tcx,
                 encoder,
@@ -213,6 +215,7 @@ impl<'sess> OnDiskCache<'sess> {
                 interpret_allocs_inverse: Vec::new(),
                 source_map: CachingSourceMapView::new(tcx.sess.source_map()),
                 file_to_file_index,
+                hygiene_context: &hygiene_encode_context,
             };
 
             // Load everything into memory so we can write it out to the on-disk
@@ -293,29 +296,26 @@ impl<'sess> OnDiskCache<'sess> {
                 .collect();
 
             let mut syntax_contexts = FxHashMap::default();
-            let mut expn_data = FxHashMap::default();
+            let mut expn_ids = FxHashMap::default();
 
             // Encode all hygiene data (`SyntaxContextData` and `ExpnData`) from the current
             // session.
-            // FIXME: Investigate tracking which `SyntaxContext`s and `ExpnId`s we actually
-            // need, to avoid serializing data that will never be used. This will require
-            // tracking which `SyntaxContext`s/`ExpnId`s are actually (transitively) referenced
-            // from any of the `Span`s that we serialize.
-
-            rustc_span::hygiene::for_all_data(|(index, _ctxt, data)| {
-                let pos = AbsoluteBytePos::new(encoder.position());
-                encoder.encode_tagged(TAG_SYNTAX_CONTEXT, data)?;
-                syntax_contexts.insert(index, pos);
-                Ok(())
-            })?;
 
-            rustc_span::hygiene::for_all_expn_data(|index, data| {
-                let pos = AbsoluteBytePos::new(encoder.position());
-                encoder.encode_tagged(TAG_EXPN_DATA, data)?;
-                //let hash = tcx.def_path_hash(data.def_id.unwrap());
-                expn_data.insert(index, pos);
-                Ok(())
-            })?;
+            hygiene_encode_context.encode(
+                &mut encoder,
+                |encoder, index, ctxt_data| {
+                    let pos = AbsoluteBytePos::new(encoder.position());
+                    encoder.encode_tagged(TAG_SYNTAX_CONTEXT, ctxt_data)?;
+                    syntax_contexts.insert(index, pos);
+                    Ok(())
+                },
+                |encoder, index, expn_data| {
+                    let pos = AbsoluteBytePos::new(encoder.position());
+                    encoder.encode_tagged(TAG_EXPN_DATA, expn_data)?;
+                    expn_ids.insert(index, pos);
+                    Ok(())
+                },
+            )?;
 
             // `Encode the file footer.
             let footer_pos = encoder.position() as u64;
@@ -328,7 +328,7 @@ impl<'sess> OnDiskCache<'sess> {
                     diagnostics_index,
                     interpret_alloc_index,
                     syntax_contexts,
-                    expn_data,
+                    expn_data: expn_ids,
                 },
             )?;
 
@@ -503,7 +503,7 @@ struct CacheDecoder<'a, 'tcx> {
     alloc_decoding_session: AllocDecodingSession<'a>,
     syntax_contexts: &'a FxHashMap<u32, AbsoluteBytePos>,
     expn_data: &'a FxHashMap<u32, AbsoluteBytePos>,
-    hygiene_context: &'a HygieneContext,
+    hygiene_context: &'a HygieneDecodeContext,
 }
 
 impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
@@ -771,6 +771,7 @@ struct CacheEncoder<'a, 'tcx, E: ty_codec::TyEncoder> {
     interpret_allocs_inverse: Vec<interpret::AllocId>,
     source_map: CachingSourceMapView<'tcx>,
     file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>,
+    hygiene_context: &'a HygieneEncodeContext,
 }
 
 impl<'a, 'tcx, E> CacheEncoder<'a, 'tcx, E>
@@ -826,7 +827,7 @@ where
     E: 'a + TyEncoder,
 {
     fn specialized_encode(&mut self, ctxt: &SyntaxContext) -> Result<(), Self::Error> {
-        rustc_span::hygiene::raw_encode_syntax_context(*ctxt, self)
+        rustc_span::hygiene::raw_encode_syntax_context(*ctxt, self.hygiene_context, self)
     }
 }
 
@@ -835,7 +836,12 @@ where
     E: 'a + TyEncoder,
 {
     fn specialized_encode(&mut self, expn: &ExpnId) -> Result<(), Self::Error> {
-        rustc_span::hygiene::raw_encode_expn_id(*expn, ExpnDataEncodeMode::IncrComp, self)
+        rustc_span::hygiene::raw_encode_expn_id(
+            *expn,
+            self.hygiene_context,
+            ExpnDataEncodeMode::IncrComp,
+            self,
+        )
     }
 }
 
diff --git a/src/librustc_span/hygiene.rs b/src/librustc_span/hygiene.rs
index 44da5d77b7e..13bc1751831 100644
--- a/src/librustc_span/hygiene.rs
+++ b/src/librustc_span/hygiene.rs
@@ -31,7 +31,7 @@ use crate::{Span, DUMMY_SP};
 
 use crate::def_id::{CrateNum, DefId, CRATE_DEF_INDEX, LOCAL_CRATE};
 use log::*;
-use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_data_structures::sync::{Lock, Lrc};
 use rustc_macros::HashStable_Generic;
 use rustc_serialize::{
@@ -889,8 +889,75 @@ impl DesugaringKind {
 impl UseSpecializedEncodable for ExpnId {}
 impl UseSpecializedDecodable for ExpnId {}
 
+#[derive(Default)]
+pub struct HygieneEncodeContext {
+    /// All `SyntaxContexts` for which we have writen `SyntaxContextData` into crate metadata.
+    /// This is `None` after we finish encoding `SyntaxContexts`, to ensure
+    /// that we don't accidentally try to encode any more `SyntaxContexts`
+    serialized_ctxts: Lock<FxHashSet<SyntaxContext>>,
+    /// The `SyntaxContexts` that we have serialized (e.g. as a result of encoding `Spans`)
+    /// in the most recent 'round' of serializnig. Serializing `SyntaxContextData`
+    /// may cause us to serialize more `SyntaxContext`s, so serialize in a loop
+    /// until we reach a fixed point.
+    latest_ctxts: Lock<FxHashSet<SyntaxContext>>,
+
+    serialized_expns: Lock<FxHashSet<ExpnId>>,
+
+    latest_expns: Lock<FxHashSet<ExpnId>>,
+}
+
+impl HygieneEncodeContext {
+    pub fn encode<
+        T,
+        R,
+        F: FnMut(&mut T, u32, &SyntaxContextData) -> Result<(), R>,
+        G: FnMut(&mut T, u32, &ExpnData) -> Result<(), R>,
+    >(
+        &self,
+        encoder: &mut T,
+        mut encode_ctxt: F,
+        mut encode_expn: G,
+    ) -> Result<(), R> {
+        // When we serialize a `SyntaxContextData`, we may end up serializing
+        // a `SyntaxContext` that we haven't seen before
+        while !self.latest_ctxts.lock().is_empty() || !self.latest_expns.lock().is_empty() {
+            debug!(
+                "encode_hygiene: Serializing a round of {:?} SyntaxContextDatas: {:?}",
+                self.latest_ctxts.lock().len(),
+                self.latest_ctxts
+            );
+
+            // Consume the current round of SyntaxContexts.
+            // Drop the lock() temporary early
+            let latest_ctxts = { std::mem::take(&mut *self.latest_ctxts.lock()) };
+
+            // It's fine to iterate over a HashMap, because the serialization
+            // of the table that we insert data into doesn't depend on insertion
+            // order
+            for_all_ctxts_in(latest_ctxts.into_iter(), |(index, ctxt, data)| {
+                if self.serialized_ctxts.lock().insert(ctxt) {
+                    encode_ctxt(encoder, index, data)?;
+                }
+                Ok(())
+            })?;
+
+            let latest_expns = { std::mem::take(&mut *self.latest_expns.lock()) };
+
+            for_all_expns_in(latest_expns.into_iter(), |index, expn, data| {
+                if self.serialized_expns.lock().insert(expn) {
+                    encode_expn(encoder, index, data)?;
+                }
+                Ok(())
+            })?;
+        }
+        debug!("encode_hygiene: Done serializing SyntaxContextData");
+        Ok(())
+    }
+}
+
+#[derive(Default)]
 /// Additional information used to assist in decoding hygiene data
-pub struct HygieneContext {
+pub struct HygieneDecodeContext {
     // Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current
     // global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create
     // a new id in the global `HygieneData`. This map tracks the ID we end up picking,
@@ -901,20 +968,11 @@ pub struct HygieneContext {
     remapped_expns: Lock<Vec<Option<ExpnId>>>,
 }
 
-impl HygieneContext {
-    pub fn new() -> HygieneContext {
-        HygieneContext {
-            remapped_ctxts: Lock::new(Vec::new()),
-            remapped_expns: Lock::new(Vec::new()),
-        }
-    }
-}
-
 pub fn decode_expn_id<
     'a,
     D: Decoder,
     F: FnOnce(&mut D, u32) -> Result<ExpnData, D::Error>,
-    G: FnOnce(CrateNum) -> &'a HygieneContext,
+    G: FnOnce(CrateNum) -> &'a HygieneDecodeContext,
 >(
     d: &mut D,
     mode: ExpnDataDecodeMode<'a, G>,
@@ -963,21 +1021,19 @@ pub fn decode_expn_id<
 
         hygiene_data.expn_data.push(Some(expn_data));
 
-        // Drop lock() temporary early
-        {
-            let mut expns = outer_expns.lock();
-            let new_len = index as usize + 1;
-            if expns.len() < new_len {
-                expns.resize(new_len, None);
-            }
-            expns[index as usize] = Some(expn_id);
+        let mut expns = outer_expns.lock();
+        let new_len = index as usize + 1;
+        if expns.len() < new_len {
+            expns.resize(new_len, None);
         }
+        expns[index as usize] = Some(expn_id);
+        drop(expns);
         expn_id
     });
     return Ok(expn_id);
 }
 
-// Decodes `SyntaxContext`, using the provided `HygieneContext`
+// Decodes `SyntaxContext`, using the provided `HygieneDecodeContext`
 // to track which `SyntaxContext`s we have already decoded.
 // The provided closure will be invoked to deserialize a `SyntaxContextData`
 // if we haven't already seen the id of the `SyntaxContext` we are deserializing.
@@ -986,7 +1042,7 @@ pub fn decode_syntax_context<
     F: FnOnce(&mut D, u32) -> Result<SyntaxContextData, D::Error>,
 >(
     d: &mut D,
-    context: &HygieneContext,
+    context: &HygieneDecodeContext,
     decode_data: F,
 ) -> Result<SyntaxContext, D::Error> {
     let raw_id: u32 = Decodable::decode(d)?;
@@ -1019,15 +1075,13 @@ pub fn decode_syntax_context<
             opaque_and_semitransparent: SyntaxContext::root(),
             dollar_crate_name: kw::Invalid,
         });
-        // Ensure that the lock() temporary is dropped early
-        {
-            let mut ctxts = outer_ctxts.lock();
-            let new_len = raw_id as usize + 1;
-            if ctxts.len() < new_len {
-                ctxts.resize(new_len, None);
-            }
-            ctxts[raw_id as usize] = Some(new_ctxt);
+        let mut ctxts = outer_ctxts.lock();
+        let new_len = raw_id as usize + 1;
+        if ctxts.len() < new_len {
+            ctxts.resize(new_len, None);
         }
+        ctxts[raw_id as usize] = Some(new_ctxt);
+        drop(ctxts);
         new_ctxt
     });
 
@@ -1056,7 +1110,7 @@ pub fn num_syntax_ctxts() -> usize {
     HygieneData::with(|data| data.syntax_context_data.len())
 }
 
-pub fn for_all_data_in<E, F: FnMut((u32, SyntaxContext, &SyntaxContextData)) -> Result<(), E>>(
+pub fn for_all_ctxts_in<E, F: FnMut((u32, SyntaxContext, &SyntaxContextData)) -> Result<(), E>>(
     ctxts: impl Iterator<Item = SyntaxContext>,
     mut f: F,
 ) -> Result<(), E> {
@@ -1069,6 +1123,18 @@ pub fn for_all_data_in<E, F: FnMut((u32, SyntaxContext, &SyntaxContextData)) ->
     Ok(())
 }
 
+pub fn for_all_expns_in<E, F: FnMut(u32, ExpnId, &ExpnData) -> Result<(), E>>(
+    expns: impl Iterator<Item = ExpnId>,
+    mut f: F,
+) -> Result<(), E> {
+    let all_data: Vec<_> = HygieneData::with(|data| {
+        expns.map(|expn| (expn, data.expn_data[expn.0 as usize].clone())).collect()
+    });
+    for (expn, data) in all_data.into_iter() {
+        f(expn.0, expn, &data.unwrap_or_else(|| panic!("Missing data for {:?}", expn)))?;
+    }
+    Ok(())
+}
 pub fn for_all_data<E, F: FnMut((u32, SyntaxContext, &SyntaxContextData)) -> Result<(), E>>(
     mut f: F,
 ) -> Result<(), E> {
@@ -1089,16 +1155,24 @@ pub fn for_all_expn_data<E, F: FnMut(u32, &ExpnData) -> Result<(), E>>(mut f: F)
 
 pub fn raw_encode_syntax_context<E: Encoder>(
     ctxt: SyntaxContext,
+    context: &HygieneEncodeContext,
     e: &mut E,
 ) -> Result<(), E::Error> {
+    if !context.serialized_ctxts.lock().contains(&ctxt) {
+        context.latest_ctxts.lock().insert(ctxt);
+    }
     ctxt.0.encode(e)
 }
 
 pub fn raw_encode_expn_id<E: Encoder>(
     expn: ExpnId,
+    context: &HygieneEncodeContext,
     mode: ExpnDataEncodeMode,
     e: &mut E,
 ) -> Result<(), E::Error> {
+    if !context.serialized_expns.lock().contains(&expn) {
+        context.latest_expns.lock().insert(expn);
+    }
     match mode {
         ExpnDataEncodeMode::IncrComp => expn.0.encode(e),
         ExpnDataEncodeMode::Metadata => {
@@ -1114,13 +1188,13 @@ pub enum ExpnDataEncodeMode {
     Metadata,
 }
 
-pub enum ExpnDataDecodeMode<'a, F: FnOnce(CrateNum) -> &'a HygieneContext> {
-    IncrComp(&'a HygieneContext),
+pub enum ExpnDataDecodeMode<'a, F: FnOnce(CrateNum) -> &'a HygieneDecodeContext> {
+    IncrComp(&'a HygieneDecodeContext),
     Metadata(F),
 }
 
-impl<'a> ExpnDataDecodeMode<'a, Box<dyn FnOnce(CrateNum) -> &'a HygieneContext>> {
-    pub fn incr_comp(ctxt: &'a HygieneContext) -> Self {
+impl<'a> ExpnDataDecodeMode<'a, Box<dyn FnOnce(CrateNum) -> &'a HygieneDecodeContext>> {
+    pub fn incr_comp(ctxt: &'a HygieneDecodeContext) -> Self {
         ExpnDataDecodeMode::IncrComp(ctxt)
     }
 }
diff --git a/src/librustc_span/lib.rs b/src/librustc_span/lib.rs
index f49e7f15a5c..7087dc80b1d 100644
--- a/src/librustc_span/lib.rs
+++ b/src/librustc_span/lib.rs
@@ -13,6 +13,7 @@
 #![feature(optin_builtin_traits)]
 #![feature(min_specialization)]
 #![feature(option_expect_none)]
+#![feature(refcell_take)]
 
 // FIXME(#56935): Work around ICEs during cross-compilation.
 #[allow(unused)]
diff --git a/src/test/incremental/hygiene/auxiliary/cached_hygiene.rs b/src/test/incremental/hygiene/auxiliary/cached_hygiene.rs
index 411be644cf4..91a9f63d39b 100644
--- a/src/test/incremental/hygiene/auxiliary/cached_hygiene.rs
+++ b/src/test/incremental/hygiene/auxiliary/cached_hygiene.rs
@@ -34,4 +34,3 @@ macro_rules! print_loc {
 pub fn unchanged_fn() {
     print_loc!();
 }
-