diff options
| author | Vadim Petrochenkov <vadim.petrochenkov@gmail.com> | 2025-04-02 23:56:09 +0300 |
|---|---|---|
| committer | Vadim Petrochenkov <vadim.petrochenkov@gmail.com> | 2025-04-03 00:18:04 +0300 |
| commit | fef3cf0d071e5b771734be4380bdccebfee60990 (patch) | |
| tree | bb1777de1f0b26f82653129a49f55827a89fcf6c /compiler/rustc_span/src | |
| parent | 4f0de4c81d80121ac7b576bc68d8016064f4d261 (diff) | |
| download | rust-fef3cf0d071e5b771734be4380bdccebfee60990.tar.gz rust-fef3cf0d071e5b771734be4380bdccebfee60990.zip | |
hygiene: Avoid recursion in syntax context decoding
Diffstat (limited to 'compiler/rustc_span/src')
| -rw-r--r-- | compiler/rustc_span/src/hygiene.rs | 161 |
1 files changed, 61 insertions, 100 deletions
diff --git a/compiler/rustc_span/src/hygiene.rs b/compiler/rustc_span/src/hygiene.rs index 9959e98e3dd..9be16f8ce0c 100644 --- a/compiler/rustc_span/src/hygiene.rs +++ b/compiler/rustc_span/src/hygiene.rs @@ -24,9 +24,6 @@ // because getting it wrong can lead to nested `HygieneData::with` calls that // trigger runtime aborts. (Fortunately these are obvious and easy to fix.) -use std::cell::RefCell; -use std::collections::hash_map::Entry; -use std::collections::hash_set::Entry as SetEntry; use std::hash::Hash; use std::sync::Arc; use std::{fmt, iter, mem}; @@ -34,7 +31,7 @@ use std::{fmt, iter, mem}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher}; -use rustc_data_structures::sync::{Lock, WorkerLocal}; +use rustc_data_structures::sync::Lock; use rustc_data_structures::unhash::UnhashMap; use rustc_hashes::Hash64; use rustc_index::IndexVec; @@ -61,8 +58,8 @@ impl !PartialOrd for SyntaxContext {} /// The other fields are only for caching. type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency); -#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)] -pub struct SyntaxContextData { +#[derive(Clone, Copy, Debug)] +struct SyntaxContextData { outer_expn: ExpnId, outer_transparency: Transparency, parent: SyntaxContext, @@ -74,6 +71,17 @@ pub struct SyntaxContextData { dollar_crate_name: Symbol, } +/// Same as `SyntaxContextData`, but `opaque(_and_semitransparent)` cannot be recursive +/// and use `None` if they need to refer to self. Used for encoding and decoding metadata. +#[derive(Encodable, Decodable)] +pub struct SyntaxContextDataNonRecursive { + outer_expn: ExpnId, + outer_transparency: Transparency, + parent: SyntaxContext, + opaque: Option<SyntaxContext>, + opaque_and_semitransparent: Option<SyntaxContext>, +} + impl SyntaxContextData { fn new( (parent, outer_expn, outer_transparency): SyntaxContextKey, @@ -114,6 +122,19 @@ impl SyntaxContextData { } } +impl SyntaxContextDataNonRecursive { + fn recursive(&self, ctxt: SyntaxContext) -> SyntaxContextData { + SyntaxContextData { + outer_expn: self.outer_expn, + outer_transparency: self.outer_transparency, + parent: self.parent, + opaque: self.opaque.unwrap_or(ctxt), + opaque_and_semitransparent: self.opaque_and_semitransparent.unwrap_or(ctxt), + dollar_crate_name: kw::DollarCrate, + } + } +} + rustc_index::newtype_index! { /// A unique ID associated with a macro invocation and expansion. #[orderable] @@ -637,6 +658,19 @@ impl HygieneData { SyntaxContextData::new(key, opaque, opaque_and_semitransparent); ctxt } + + fn non_recursive_ctxt(&self, ctxt: SyntaxContext) -> SyntaxContextDataNonRecursive { + debug_assert!(!self.syntax_context_data[ctxt.0 as usize].is_decode_placeholder()); + let data = &self.syntax_context_data[ctxt.0 as usize]; + SyntaxContextDataNonRecursive { + outer_expn: data.outer_expn, + outer_transparency: data.outer_transparency, + parent: data.parent, + opaque: (data.opaque != ctxt).then_some(data.opaque), + opaque_and_semitransparent: (data.opaque_and_semitransparent != ctxt) + .then_some(data.opaque_and_semitransparent), + } + } } pub fn walk_chain(span: Span, to: SyntaxContext) -> Span { @@ -1266,7 +1300,7 @@ impl HygieneEncodeContext { pub fn encode<T>( &self, encoder: &mut T, - mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextData), + mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextDataNonRecursive), mut encode_expn: impl FnMut(&mut T, ExpnId, &ExpnData, ExpnHash), ) { // When we serialize a `SyntaxContextData`, we may end up serializing @@ -1315,18 +1349,12 @@ struct HygieneDecodeContextInner { // so that multiple occurrences of the same serialized id are decoded to the same // `SyntaxContext`. This only stores `SyntaxContext`s which are completely decoded. remapped_ctxts: Vec<Option<SyntaxContext>>, - - /// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`. - decoding: FxHashMap<u32, SyntaxContext>, } #[derive(Default)] /// Additional information used to assist in decoding hygiene data pub struct HygieneDecodeContext { inner: Lock<HygieneDecodeContextInner>, - - /// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread. - local_in_progress: WorkerLocal<RefCell<FxHashSet<u32>>>, } /// Register an expansion which has been decoded from the on-disk-cache for the local crate. @@ -1397,7 +1425,10 @@ pub fn decode_expn_id( // to track which `SyntaxContext`s we have already decoded. // The provided closure will be invoked to deserialize a `SyntaxContextData` // if we haven't already seen the id of the `SyntaxContext` we are deserializing. -pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>( +pub fn decode_syntax_context< + D: Decoder, + F: FnOnce(&mut D, u32) -> SyntaxContextDataNonRecursive, +>( d: &mut D, context: &HygieneDecodeContext, decode_data: F, @@ -1409,113 +1440,43 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext return SyntaxContext::root(); } - let pending_ctxt = { - let mut inner = context.inner.lock(); - - // Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between - // raw ids from different crate metadatas. - if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() { - // This has already been decoded. - return ctxt; - } - - match inner.decoding.entry(raw_id) { - Entry::Occupied(ctxt_entry) => { - let pending_ctxt = *ctxt_entry.get(); - match context.local_in_progress.borrow_mut().entry(raw_id) { - // We're decoding this already on the current thread. Return here and let the - // function higher up the stack finish decoding to handle recursive cases. - // Hopefully having a `SyntaxContext` that refers to an incorrect data is ok - // during reminder of the decoding process, it's certainly not ok after the - // top level decoding function returns. - SetEntry::Occupied(..) => return pending_ctxt, - // Some other thread is currently decoding this. - // Race with it (alternatively we could wait here). - // We cannot return this value, unlike in the recursive case above, because it - // may expose a `SyntaxContext` pointing to incorrect data to arbitrary code. - SetEntry::Vacant(entry) => { - entry.insert(); - pending_ctxt - } - } - } - Entry::Vacant(entry) => { - // We are the first thread to start decoding. Mark the current thread as being - // progress. - context.local_in_progress.borrow_mut().insert(raw_id); - - // Allocate and store SyntaxContext id *before* calling the decoder function, - // as the SyntaxContextData may reference itself. - let new_ctxt = HygieneData::with(|hygiene_data| { - // Push a dummy SyntaxContextData to ensure that nobody else can get the - // same ID as us. This will be overwritten after call `decode_data`. - hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder()); - SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1) - }); - entry.insert(new_ctxt); - new_ctxt - } - } - }; + // Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between + // raw ids from different crate metadatas. + if let Some(ctxt) = context.inner.lock().remapped_ctxts.get(raw_id as usize).copied().flatten() + { + // This has already been decoded. + return ctxt; + } // Don't try to decode data while holding the lock, since we need to // be able to recursively decode a SyntaxContext let ctxt_data = decode_data(d, raw_id); - let ctxt_key = ctxt_data.key(); let ctxt = HygieneData::with(|hygiene_data| { - match hygiene_data.syntax_context_map.get(&ctxt_key) { - // Ensure that syntax contexts are unique. - // If syntax contexts with the given key already exists, reuse it instead of - // using `pending_ctxt`. - // `pending_ctxt` will leave an unused hole in the vector of syntax contexts. - // Hopefully its value isn't stored anywhere during decoding and its dummy data - // is never accessed later. The `is_decode_placeholder` asserts on all - // accesses to syntax context data attempt to ensure it. - Some(&ctxt) => ctxt, - // This is a completely new context. - // Overwrite its placeholder data with our decoded data. - None => { - let ctxt_data_ref = - &mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize]; - let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data); - // Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`. - // We don't care what the encoding crate set this to - we want to resolve it - // from the perspective of the current compilation session. - ctxt_data_ref.dollar_crate_name = kw::DollarCrate; - // Make sure nothing weird happened while `decode_data` was running. - if !prev_ctxt_data.is_decode_placeholder() { - // Another thread may have already inserted the decoded data, - // but the decoded data should match. - assert_eq!(prev_ctxt_data, *ctxt_data_ref); - } - hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt); - pending_ctxt - } - } + let ctxt_key = (ctxt_data.parent, ctxt_data.outer_expn, ctxt_data.outer_transparency); + *hygiene_data.syntax_context_map.entry(ctxt_key).or_insert_with(|| { + let ctxt = SyntaxContext::from_usize(hygiene_data.syntax_context_data.len()); + hygiene_data.syntax_context_data.push(ctxt_data.recursive(ctxt)); + ctxt + }) }); - // Mark the context as completed - context.local_in_progress.borrow_mut().remove(&raw_id); - let mut inner = context.inner.lock(); let new_len = raw_id as usize + 1; if inner.remapped_ctxts.len() < new_len { inner.remapped_ctxts.resize(new_len, None); } inner.remapped_ctxts[raw_id as usize] = Some(ctxt); - inner.decoding.remove(&raw_id); ctxt } -fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>( +fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextDataNonRecursive)>( ctxts: impl Iterator<Item = SyntaxContext>, mut f: F, ) { - let all_data: Vec<_> = HygieneData::with(|data| { - ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect() - }); + let all_data: Vec<_> = + HygieneData::with(|data| ctxts.map(|ctxt| (ctxt, data.non_recursive_ctxt(ctxt))).collect()); for (ctxt, data) in all_data.into_iter() { f(ctxt.0, ctxt, &data); } |
