about summary refs log tree commit diff
path: root/compiler/rustc_const_eval/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2025-02-13 15:27:30 +0000
committerbors <bors@rust-lang.org>2025-02-13 15:27:30 +0000
commitc241e146506600f5ab7f4026ff015df8a658400e (patch)
tree9f5ad2f1e26d9fa95ae7dad05b698d5ae70e5a6b /compiler/rustc_const_eval/src
parent54cdc751df770517e70db0588573e32e6a7b9821 (diff)
parentb722d5da1d9b9eb86b1bdb5d9e7a820d035b0b23 (diff)
downloadrust-c241e146506600f5ab7f4026ff015df8a658400e.tar.gz
rust-c241e146506600f5ab7f4026ff015df8a658400e.zip
Auto merge of #136593 - lukas-code:ty-value-perf, r=oli-obk
valtree performance tuning

Summary: This PR makes type checking of code with many type-level constants faster.

After https://github.com/rust-lang/rust/pull/136180 was merged, we observed a small perf regression (https://github.com/rust-lang/rust/pull/136318#issuecomment-2635562821). This happened because that PR introduced additional copies in the fast reject code path for consts, which is very hot for certain crates: https://github.com/rust-lang/rust/blob/6c1d960d88dd3755548b3818630acb63fa98187e/compiler/rustc_type_ir/src/fast_reject.rs#L486-L487

This PR improves the performance again by properly interning the valtrees so that copying and comparing them becomes faster. This will become especially useful with `feature(adt_const_params)`, so the fast reject code doesn't have to do a deep compare of the valtrees.

Note that we can't just compare the interned consts themselves in the fast reject, because sometimes `'static` lifetimes in the type are be replaced with inference variables (due to canonicalization) on one side but not the other.

A less invasive alternative that I considered is simply avoiding copies introduced by https://github.com/rust-lang/rust/pull/136180 and comparing the valtrees it in-place (see commit: https://github.com/rust-lang/rust/commit/9e91e50ac5920f0b9b4a3b1e0880c85336ba5c64 / perf results: https://github.com/rust-lang/rust/pull/136593#issuecomment-2642303245), however that was still measurably slower than interning.

There are some minor regressions in secondary benchmarks: These happen due to changes in memory allocations and seem acceptable to me. The crates that make heavy use of valtrees show no significant changes in memory usage.
Diffstat (limited to 'compiler/rustc_const_eval/src')
-rw-r--r--compiler/rustc_const_eval/src/const_eval/valtrees.rs48
1 files changed, 21 insertions, 27 deletions
diff --git a/compiler/rustc_const_eval/src/const_eval/valtrees.rs b/compiler/rustc_const_eval/src/const_eval/valtrees.rs
index c35910a706b..3776fb55c2e 100644
--- a/compiler/rustc_const_eval/src/const_eval/valtrees.rs
+++ b/compiler/rustc_const_eval/src/const_eval/valtrees.rs
@@ -2,7 +2,7 @@ use rustc_abi::{BackendRepr, VariantIdx};
 use rustc_data_structures::stack::ensure_sufficient_stack;
 use rustc_middle::mir::interpret::{EvalToValTreeResult, GlobalId, ReportedErrorInfo};
 use rustc_middle::ty::layout::{LayoutCx, LayoutOf, TyAndLayout};
-use rustc_middle::ty::{self, ScalarInt, Ty, TyCtxt};
+use rustc_middle::ty::{self, Ty, TyCtxt};
 use rustc_middle::{bug, mir};
 use rustc_span::DUMMY_SP;
 use tracing::{debug, instrument, trace};
@@ -21,7 +21,7 @@ use crate::interpret::{
 fn branches<'tcx>(
     ecx: &CompileTimeInterpCx<'tcx>,
     place: &MPlaceTy<'tcx>,
-    n: usize,
+    field_count: usize,
     variant: Option<VariantIdx>,
     num_nodes: &mut usize,
 ) -> ValTreeCreationResult<'tcx> {
@@ -29,30 +29,28 @@ fn branches<'tcx>(
         Some(variant) => ecx.project_downcast(place, variant).unwrap(),
         None => place.clone(),
     };
-    let variant = variant.map(|variant| Some(ty::ValTree::Leaf(ScalarInt::from(variant.as_u32()))));
-    debug!(?place, ?variant);
+    debug!(?place);
 
-    let mut fields = Vec::with_capacity(n);
-    for i in 0..n {
-        let field = ecx.project_field(&place, i).unwrap();
-        let valtree = const_to_valtree_inner(ecx, &field, num_nodes)?;
-        fields.push(Some(valtree));
-    }
+    let mut branches = Vec::with_capacity(field_count + variant.is_some() as usize);
 
     // For enums, we prepend their variant index before the variant's fields so we can figure out
     // the variant again when just seeing a valtree.
-    let branches = variant
-        .into_iter()
-        .chain(fields.into_iter())
-        .collect::<Option<Vec<_>>>()
-        .expect("should have already checked for errors in ValTree creation");
+    if let Some(variant) = variant {
+        branches.push(ty::ValTree::from_scalar_int(*ecx.tcx, variant.as_u32().into()));
+    }
+
+    for i in 0..field_count {
+        let field = ecx.project_field(&place, i).unwrap();
+        let valtree = const_to_valtree_inner(ecx, &field, num_nodes)?;
+        branches.push(valtree);
+    }
 
     // Have to account for ZSTs here
     if branches.len() == 0 {
         *num_nodes += 1;
     }
 
-    Ok(ty::ValTree::Branch(ecx.tcx.arena.alloc_from_iter(branches)))
+    Ok(ty::ValTree::from_branches(*ecx.tcx, branches))
 }
 
 #[instrument(skip(ecx), level = "debug")]
@@ -70,7 +68,7 @@ fn slice_branches<'tcx>(
         elems.push(valtree);
     }
 
-    Ok(ty::ValTree::Branch(ecx.tcx.arena.alloc_from_iter(elems)))
+    Ok(ty::ValTree::from_branches(*ecx.tcx, elems))
 }
 
 #[instrument(skip(ecx), level = "debug")]
@@ -79,6 +77,7 @@ fn const_to_valtree_inner<'tcx>(
     place: &MPlaceTy<'tcx>,
     num_nodes: &mut usize,
 ) -> ValTreeCreationResult<'tcx> {
+    let tcx = *ecx.tcx;
     let ty = place.layout.ty;
     debug!("ty kind: {:?}", ty.kind());
 
@@ -89,14 +88,14 @@ fn const_to_valtree_inner<'tcx>(
     match ty.kind() {
         ty::FnDef(..) => {
             *num_nodes += 1;
-            Ok(ty::ValTree::zst())
+            Ok(ty::ValTree::zst(tcx))
         }
         ty::Bool | ty::Int(_) | ty::Uint(_) | ty::Float(_) | ty::Char => {
             let val = ecx.read_immediate(place).unwrap();
             let val = val.to_scalar_int().unwrap();
             *num_nodes += 1;
 
-            Ok(ty::ValTree::Leaf(val))
+            Ok(ty::ValTree::from_scalar_int(tcx, val))
         }
 
         ty::Pat(base, ..) => {
@@ -127,7 +126,7 @@ fn const_to_valtree_inner<'tcx>(
                 return Err(ValTreeCreationError::NonSupportedType(ty));
             };
             // It's just a ScalarInt!
-            Ok(ty::ValTree::Leaf(val))
+            Ok(ty::ValTree::from_scalar_int(tcx, val))
         }
 
         // Technically we could allow function pointers (represented as `ty::Instance`), but this is not guaranteed to
@@ -287,16 +286,11 @@ pub fn valtree_to_const_value<'tcx>(
     // FIXME: Does this need an example?
     match *cv.ty.kind() {
         ty::FnDef(..) => {
-            assert!(cv.valtree.unwrap_branch().is_empty());
+            assert!(cv.valtree.is_zst());
             mir::ConstValue::ZeroSized
         }
         ty::Bool | ty::Int(_) | ty::Uint(_) | ty::Float(_) | ty::Char | ty::RawPtr(_, _) => {
-            match cv.valtree {
-                ty::ValTree::Leaf(scalar_int) => mir::ConstValue::Scalar(Scalar::Int(scalar_int)),
-                ty::ValTree::Branch(_) => bug!(
-                    "ValTrees for Bool, Int, Uint, Float, Char or RawPtr should have the form ValTree::Leaf"
-                ),
-            }
+            mir::ConstValue::Scalar(Scalar::Int(cv.valtree.unwrap_leaf()))
         }
         ty::Pat(ty, _) => {
             let cv = ty::Value { valtree: cv.valtree, ty };