diff options
| author | bors <bors@rust-lang.org> | 2025-09-19 12:15:03 +0000 | 
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-09-19 12:15:03 +0000 | 
| commit | e10aa8891182378de002bd71bf4d04181fb8231d (patch) | |
| tree | 5ffb3b69a8c42cacb434c2cd767a44027884d261 | |
| parent | 2f4dfc753fd86c672aa4145940db075a8a149f17 (diff) | |
| parent | df04be8cf744b500b5972fdc76898a96067d5517 (diff) | |
| download | rust-e10aa8891182378de002bd71bf4d04181fb8231d.tar.gz rust-e10aa8891182378de002bd71bf4d04181fb8231d.zip | |
Auto merge of #145737 - cjgillot:gvn-valueset, r=saethlin
GVN: stop hashing opaque values GVN generates values that are not meant to be unified with any other. For instance `Opaque` (aka we don't know anything), non-deterministic constants and borrows. The current algorithm generates a unique index, so the generated `Value` will be different from all the existing. This is wasteful, as we should not hash that `Value` at all. This PR proposes to do this. This involves partially reimplementing a `FxIndexSet`, but yields a small but consistent perf improvement (https://github.com/rust-lang/rust/pull/145737#issuecomment-3276951054).
| -rw-r--r-- | Cargo.lock | 1 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/Cargo.toml | 1 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/gvn.rs | 222 | 
3 files changed, 180 insertions, 44 deletions
| diff --git a/Cargo.lock b/Cargo.lock index 4d1c6d4c46b..6ff52d500f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4235,6 +4235,7 @@ name = "rustc_mir_transform" version = "0.0.0" dependencies = [ "either", + "hashbrown", "itertools", "rustc_abi", "rustc_arena", diff --git a/compiler/rustc_mir_transform/Cargo.toml b/compiler/rustc_mir_transform/Cargo.toml index 08c43a4648c..511c1960e40 100644 --- a/compiler/rustc_mir_transform/Cargo.toml +++ b/compiler/rustc_mir_transform/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start either = "1" +hashbrown = "0.15" itertools = "0.12" rustc_abi = { path = "../rustc_abi" } rustc_arena = { path = "../rustc_arena" } diff --git a/compiler/rustc_mir_transform/src/gvn.rs b/compiler/rustc_mir_transform/src/gvn.rs index 30e68a3f650..ebec3d12500 100644 --- a/compiler/rustc_mir_transform/src/gvn.rs +++ b/compiler/rustc_mir_transform/src/gvn.rs @@ -85,8 +85,10 @@ //! that contain `AllocId`s. use std::borrow::Cow; +use std::hash::{Hash, Hasher}; use either::Either; +use hashbrown::hash_table::{Entry, HashTable}; use itertools::Itertools as _; use rustc_abi::{self as abi, BackendRepr, FIRST_VARIANT, FieldIdx, Primitive, Size, VariantIdx}; use rustc_const_eval::const_eval::DummyMachine; @@ -94,7 +96,7 @@ use rustc_const_eval::interpret::{ ImmTy, Immediate, InterpCx, MemPlaceMeta, MemoryKind, OpTy, Projectable, Scalar, intern_const_alloc_for_constprop, }; -use rustc_data_structures::fx::{FxIndexSet, MutableValues}; +use rustc_data_structures::fx::FxHasher; use rustc_data_structures::graph::dominators::Dominators; use rustc_hir::def::DefKind; use rustc_index::bit_set::DenseBitSet; @@ -152,9 +154,29 @@ impl<'tcx> crate::MirPass<'tcx> for GVN { } newtype_index! { + /// This represents a `Value` in the symbolic execution. + #[debug_format = "_v{}"] struct VnIndex {} } +/// Marker type to forbid hashing and comparing opaque values. +/// This struct should only be constructed by `ValueSet::insert_unique` to ensure we use that +/// method to create non-unifiable values. It will ICE if used in `ValueSet::insert`. +#[derive(Copy, Clone, Debug, Eq)] +struct VnOpaque; +impl PartialEq for VnOpaque { + fn eq(&self, _: &VnOpaque) -> bool { + // ICE if we try to compare unique values + unreachable!() + } +} +impl Hash for VnOpaque { + fn hash<T: Hasher>(&self, _: &mut T) { + // ICE if we try to hash unique values + unreachable!() + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] enum AddressKind { Ref(BorrowKind), @@ -166,15 +188,17 @@ enum Value<'tcx> { // Root values. /// Used to represent values we know nothing about. /// The `usize` is a counter incremented by `new_opaque`. - Opaque(usize), + Opaque(VnOpaque), /// Evaluated or unevaluated constant value. Constant { value: Const<'tcx>, /// Some constants do not have a deterministic value. To avoid merging two instances of the /// same `Const`, we assign them an additional integer index. - // `disambiguator` is 0 iff the constant is deterministic. - disambiguator: usize, + // `disambiguator` is `None` iff the constant is deterministic. + disambiguator: Option<VnOpaque>, }, + + // Aggregates. /// An aggregate value, either tuple/closure/struct/enum. /// This does not contain unions, as we cannot reason with the value. Aggregate(VariantIdx, Vec<VnIndex>), @@ -192,7 +216,7 @@ enum Value<'tcx> { place: Place<'tcx>, kind: AddressKind, /// Give each borrow and pointer a different provenance, so we don't merge them. - provenance: usize, + provenance: VnOpaque, }, // Extractions. @@ -211,6 +235,107 @@ enum Value<'tcx> { }, } +/// Stores and deduplicates pairs of `(Value, Ty)` into in `VnIndex` numbered values. +/// +/// This data structure is mostly a partial reimplementation of `FxIndexMap<VnIndex, (Value, Ty)>`. +/// We do not use a regular `FxIndexMap` to skip hashing values that are unique by construction, +/// like opaque values, address with provenance and non-deterministic constants. +struct ValueSet<'tcx> { + indices: HashTable<VnIndex>, + hashes: IndexVec<VnIndex, u64>, + values: IndexVec<VnIndex, Value<'tcx>>, + types: IndexVec<VnIndex, Ty<'tcx>>, +} + +impl<'tcx> ValueSet<'tcx> { + fn new(num_values: usize) -> ValueSet<'tcx> { + ValueSet { + indices: HashTable::with_capacity(num_values), + hashes: IndexVec::with_capacity(num_values), + values: IndexVec::with_capacity(num_values), + types: IndexVec::with_capacity(num_values), + } + } + + /// Insert a `(Value, Ty)` pair without hashing or deduplication. + /// This always creates a new `VnIndex`. + #[inline] + fn insert_unique( + &mut self, + ty: Ty<'tcx>, + value: impl FnOnce(VnOpaque) -> Value<'tcx>, + ) -> VnIndex { + let value = value(VnOpaque); + + debug_assert!(match value { + Value::Opaque(_) | Value::Address { .. } => true, + Value::Constant { disambiguator, .. } => disambiguator.is_some(), + _ => false, + }); + + let index = self.hashes.push(0); + let _index = self.types.push(ty); + debug_assert_eq!(index, _index); + let _index = self.values.push(value); + debug_assert_eq!(index, _index); + index + } + + /// Insert a `(Value, Ty)` pair to be deduplicated. + /// Returns `true` as second tuple field if this value did not exist previously. + #[allow(rustc::pass_by_value)] // closures take `&VnIndex` + fn insert(&mut self, ty: Ty<'tcx>, value: Value<'tcx>) -> (VnIndex, bool) { + debug_assert!(match value { + Value::Opaque(_) | Value::Address { .. } => false, + Value::Constant { disambiguator, .. } => disambiguator.is_none(), + _ => true, + }); + + let hash: u64 = { + let mut h = FxHasher::default(); + value.hash(&mut h); + ty.hash(&mut h); + h.finish() + }; + + let eq = |index: &VnIndex| self.values[*index] == value && self.types[*index] == ty; + let hasher = |index: &VnIndex| self.hashes[*index]; + match self.indices.entry(hash, eq, hasher) { + Entry::Occupied(entry) => { + let index = *entry.get(); + (index, false) + } + Entry::Vacant(entry) => { + let index = self.hashes.push(hash); + entry.insert(index); + let _index = self.values.push(value); + debug_assert_eq!(index, _index); + let _index = self.types.push(ty); + debug_assert_eq!(index, _index); + (index, true) + } + } + } + + /// Return the `Value` associated with the given `VnIndex`. + #[inline] + fn value(&self, index: VnIndex) -> &Value<'tcx> { + &self.values[index] + } + + /// Return the type associated with the given `VnIndex`. + #[inline] + fn ty(&self, index: VnIndex) -> Ty<'tcx> { + self.types[index] + } + + /// Replace the value associated with `index` with an opaque value. + #[inline] + fn forget(&mut self, index: VnIndex) { + self.values[index] = Value::Opaque(VnOpaque); + } +} + struct VnState<'body, 'tcx> { tcx: TyCtxt<'tcx>, ecx: InterpCx<'tcx, DummyMachine>, @@ -221,11 +346,9 @@ struct VnState<'body, 'tcx> { /// Locals that are assigned that value. // This vector does not hold all the values of `VnIndex` that we create. rev_locals: IndexVec<VnIndex, SmallVec<[Local; 1]>>, - values: FxIndexSet<(Value<'tcx>, Ty<'tcx>)>, + values: ValueSet<'tcx>, /// Values evaluated as constants if possible. evaluated: IndexVec<VnIndex, Option<OpTy<'tcx>>>, - /// Counter to generate different values. - next_opaque: usize, /// Cache the deref values. derefs: Vec<VnIndex>, ssa: &'body SsaLocals, @@ -256,9 +379,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { is_coroutine: body.coroutine.is_some(), locals: IndexVec::from_elem(None, local_decls), rev_locals: IndexVec::with_capacity(num_values), - values: FxIndexSet::with_capacity_and_hasher(num_values, Default::default()), + values: ValueSet::new(num_values), evaluated: IndexVec::with_capacity(num_values), - next_opaque: 1, derefs: Vec::new(), ssa, dominators, @@ -272,8 +394,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { #[instrument(level = "trace", skip(self), ret)] fn insert(&mut self, ty: Ty<'tcx>, value: Value<'tcx>) -> VnIndex { - let (index, new) = self.values.insert_full((value, ty)); - let index = VnIndex::from_usize(index); + let (index, new) = self.values.insert(ty, value); if new { // Grow `evaluated` and `rev_locals` here to amortize the allocations. let evaluated = self.eval_to_const(index); @@ -285,18 +406,16 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { index } - fn next_opaque(&mut self) -> usize { - let next_opaque = self.next_opaque; - self.next_opaque += 1; - next_opaque - } - /// Create a new `Value` for which we have no information at all, except that it is distinct /// from all the others. #[instrument(level = "trace", skip(self), ret)] fn new_opaque(&mut self, ty: Ty<'tcx>) -> VnIndex { - let value = Value::Opaque(self.next_opaque()); - self.insert(ty, value) + let index = self.values.insert_unique(ty, Value::Opaque); + let _index = self.evaluated.push(None); + debug_assert_eq!(index, _index); + let _index = self.rev_locals.push(SmallVec::new()); + debug_assert_eq!(index, _index); + index } /// Create a new `Value::Address` distinct from all the others. @@ -309,18 +428,49 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { } AddressKind::Address(mutbl) => Ty::new_ptr(self.tcx, pty, mutbl.to_mutbl_lossy()), }; - let value = Value::Address { place, kind, provenance: self.next_opaque() }; - self.insert(ty, value) + let index = + self.values.insert_unique(ty, |provenance| Value::Address { place, kind, provenance }); + let evaluated = self.eval_to_const(index); + let _index = self.evaluated.push(evaluated); + debug_assert_eq!(index, _index); + let _index = self.rev_locals.push(SmallVec::new()); + debug_assert_eq!(index, _index); + index + } + + #[instrument(level = "trace", skip(self), ret)] + fn insert_constant(&mut self, value: Const<'tcx>) -> VnIndex { + let (index, new) = if value.is_deterministic() { + // The constant is deterministic, no need to disambiguate. + let constant = Value::Constant { value, disambiguator: None }; + self.values.insert(value.ty(), constant) + } else { + // Multiple mentions of this constant will yield different values, + // so assign a different `disambiguator` to ensure they do not get the same `VnIndex`. + let index = self.values.insert_unique(value.ty(), |disambiguator| Value::Constant { + value, + disambiguator: Some(disambiguator), + }); + (index, true) + }; + if new { + let evaluated = self.eval_to_const(index); + let _index = self.evaluated.push(evaluated); + debug_assert_eq!(index, _index); + let _index = self.rev_locals.push(SmallVec::new()); + debug_assert_eq!(index, _index); + } + index } #[inline] fn get(&self, index: VnIndex) -> &Value<'tcx> { - &self.values.get_index(index.as_usize()).unwrap().0 + self.values.value(index) } #[inline] fn ty(&self, index: VnIndex) -> Ty<'tcx> { - self.values.get_index(index.as_usize()).unwrap().1 + self.values.ty(index) } /// Record that `local` is assigned `value`. `local` must be SSA. @@ -331,33 +481,18 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { self.rev_locals[value].push(local); } - fn insert_constant(&mut self, value: Const<'tcx>) -> VnIndex { - let disambiguator = if value.is_deterministic() { - // The constant is deterministic, no need to disambiguate. - 0 - } else { - // Multiple mentions of this constant will yield different values, - // so assign a different `disambiguator` to ensure they do not get the same `VnIndex`. - let disambiguator = self.next_opaque(); - // `disambiguator: 0` means deterministic. - debug_assert_ne!(disambiguator, 0); - disambiguator - }; - self.insert(value.ty(), Value::Constant { value, disambiguator }) - } - fn insert_bool(&mut self, flag: bool) -> VnIndex { // Booleans are deterministic. let value = Const::from_bool(self.tcx, flag); debug_assert!(value.is_deterministic()); - self.insert(self.tcx.types.bool, Value::Constant { value, disambiguator: 0 }) + self.insert(self.tcx.types.bool, Value::Constant { value, disambiguator: None }) } fn insert_scalar(&mut self, ty: Ty<'tcx>, scalar: Scalar) -> VnIndex { // Scalars are deterministic. let value = Const::from_scalar(self.tcx, scalar, ty); debug_assert!(value.is_deterministic()); - self.insert(ty, Value::Constant { value, disambiguator: 0 }) + self.insert(ty, Value::Constant { value, disambiguator: None }) } fn insert_tuple(&mut self, ty: Ty<'tcx>, values: Vec<VnIndex>) -> VnIndex { @@ -372,8 +507,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> { fn invalidate_derefs(&mut self) { for deref in std::mem::take(&mut self.derefs) { - let opaque = self.next_opaque(); - self.values.get_index_mut2(deref.index()).unwrap().0 = Value::Opaque(opaque); + self.values.forget(deref); } } @@ -1572,7 +1706,7 @@ impl<'tcx> VnState<'_, 'tcx> { // This was already constant in MIR, do not change it. If the constant is not // deterministic, adding an additional mention of it in MIR will not give the same value as // the former mention. - if let Value::Constant { value, disambiguator: 0 } = *self.get(index) { + if let Value::Constant { value, disambiguator: None } = *self.get(index) { debug_assert!(value.is_deterministic()); return Some(ConstOperand { span: DUMMY_SP, user_ty: None, const_: value }); } | 
