diff options
Diffstat (limited to 'compiler/rustc_pattern_analysis/src/usefulness.rs')
| -rw-r--r-- | compiler/rustc_pattern_analysis/src/usefulness.rs | 278 |
1 files changed, 220 insertions, 58 deletions
diff --git a/compiler/rustc_pattern_analysis/src/usefulness.rs b/compiler/rustc_pattern_analysis/src/usefulness.rs index 6b1de807797..b51b1a1f722 100644 --- a/compiler/rustc_pattern_analysis/src/usefulness.rs +++ b/compiler/rustc_pattern_analysis/src/usefulness.rs @@ -300,6 +300,166 @@ //! //! //! +//! # `Missing` and relevancy +//! +//! ## Relevant values +//! +//! Take the following example: +//! +//! ```compile_fail,E0004 +//! # let foo = (true, true); +//! match foo { +//! (true, _) => 1, +//! (_, true) => 2, +//! }; +//! ``` +//! +//! Consider the value `(true, true)`: +//! - Row 2 does not distinguish `(true, true)` and `(false, true)`; +//! - `false` does not show up in the first column of the match, so without knowing anything else we +//! can deduce that `(false, true)` matches the same or fewer rows than `(true, true)`. +//! +//! Using those two facts together, we deduce that `(true, true)` will not give us more usefulness +//! information about row 2 than `(false, true)` would. We say that "`(true, true)` is made +//! irrelevant for row 2 by `(false, true)`". We will use this idea to prune the search tree. +//! +//! +//! ## Computing relevancy +//! +//! We now generalize from the above example to approximate relevancy in a simple way. Note that we +//! will only compute an approximation: we can sometimes determine when a case is irrelevant, but +//! computing this precisely is at least as hard as computing usefulness. +//! +//! Our computation of relevancy relies on the `Missing` constructor. As explained in +//! [`crate::constructor`], `Missing` represents the constructors not present in a given column. For +//! example in the following: +//! +//! ```compile_fail,E0004 +//! enum Direction { North, South, East, West } +//! # let wind = (Direction::North, 0u8); +//! match wind { +//! (Direction::North, _) => 1, +//! (_, 50..) => 2, +//! }; +//! ``` +//! +//! Here `South`, `East` and `West` are missing in the first column, and `0..50` is missing in the +//! second. Both of these sets are represented by `Constructor::Missing` in their corresponding +//! column. +//! +//! We then compute relevancy as follows: during the course of the algorithm, for a row `r`: +//! - if `r` has a wildcard in the first column; +//! - and some constructors are missing in that column; +//! - then any `c != Missing` is considered irrelevant for row `r`. +//! +//! By this we mean that continuing the algorithm by specializing with `c` is guaranteed not to +//! contribute more information about the usefulness of row `r` than what we would get by +//! specializing with `Missing`. The argument is the same as in the previous subsection. +//! +//! Once we've specialized by a constructor `c` that is irrelevant for row `r`, we're guaranteed to +//! only explore values irrelevant for `r`. If we then ever reach a point where we're only exploring +//! values that are irrelevant to all of the rows (including the virtual wildcard row used for +//! exhaustiveness), we skip that case entirely. +//! +//! +//! ## Example +//! +//! Let's go through a variation on the first example: +//! +//! ```compile_fail,E0004 +//! # let foo = (true, true, true); +//! match foo { +//! (true, _, true) => 1, +//! (_, true, _) => 2, +//! }; +//! ``` +//! +//! ```text +//! ┐ Patterns: +//! │ 1. `[(true, _, true)]` +//! │ 2. `[(_, true, _)]` +//! │ 3. `[_]` // virtual extra wildcard row +//! │ +//! │ Specialize with `(,,)`: +//! ├─┐ Patterns: +//! │ │ 1. `[true, _, true]` +//! │ │ 2. `[_, true, _]` +//! │ │ 3. `[_, _, _]` +//! │ │ +//! │ │ There are missing constructors in the first column (namely `false`), hence +//! │ │ `true` is irrelevant for rows 2 and 3. +//! │ │ +//! │ │ Specialize with `true`: +//! │ ├─┐ Patterns: +//! │ │ │ 1. `[_, true]` +//! │ │ │ 2. `[true, _]` // now exploring irrelevant cases +//! │ │ │ 3. `[_, _]` // now exploring irrelevant cases +//! │ │ │ +//! │ │ │ There are missing constructors in the first column (namely `false`), hence +//! │ │ │ `true` is irrelevant for rows 1 and 3. +//! │ │ │ +//! │ │ │ Specialize with `true`: +//! │ │ ├─┐ Patterns: +//! │ │ │ │ 1. `[true]` // now exploring irrelevant cases +//! │ │ │ │ 2. `[_]` // now exploring irrelevant cases +//! │ │ │ │ 3. `[_]` // now exploring irrelevant cases +//! │ │ │ │ +//! │ │ │ │ The current case is irrelevant for all rows: we backtrack immediately. +//! │ │ ├─┘ +//! │ │ │ +//! │ │ │ Specialize with `false`: +//! │ │ ├─┐ Patterns: +//! │ │ │ │ 1. `[true]` +//! │ │ │ │ 3. `[_]` // now exploring irrelevant cases +//! │ │ │ │ +//! │ │ │ │ Specialize with `true`: +//! │ │ │ ├─┐ Patterns: +//! │ │ │ │ │ 1. `[]` +//! │ │ │ │ │ 3. `[]` // now exploring irrelevant cases +//! │ │ │ │ │ +//! │ │ │ │ │ Row 1 is therefore useful. +//! │ │ │ ├─┘ +//! <etc...> +//! ``` +//! +//! Relevancy allowed us to skip the case `(true, true, _)` entirely. In some cases this pruning can +//! give drastic speedups. The case this was built for is the following (#118437): +//! +//! ```ignore(illustrative) +//! match foo { +//! (true, _, _, _, ..) => 1, +//! (_, true, _, _, ..) => 2, +//! (_, _, true, _, ..) => 3, +//! (_, _, _, true, ..) => 4, +//! ... +//! } +//! ``` +//! +//! Without considering relevancy, we would explore all 2^n combinations of the `true` and `Missing` +//! constructors. Relevancy tells us that e.g. `(true, true, false, false, false, ...)` is +//! irrelevant for all the rows. This allows us to skip all cases with more than one `true` +//! constructor, changing the runtime from exponential to linear. +//! +//! +//! ## Relevancy and exhaustiveness +//! +//! For exhaustiveness, we do something slightly different w.r.t relevancy: we do not report +//! witnesses of non-exhaustiveness that are irrelevant for the virtual wildcard row. For example, +//! in: +//! +//! ```ignore(illustrative) +//! match foo { +//! (true, true) => {} +//! } +//! ``` +//! +//! we only report `(false, _)` as missing. This was a deliberate choice made early in the +//! development of rust, for diagnostic and performance purposes. As showed in the previous section, +//! ignoring irrelevant cases preserves usefulness, so this choice still correctly computes whether +//! a match is exhaustive. +//! +//! +//! //! # Or-patterns //! //! What we have described so far works well if there are no or-patterns. To handle them, if the @@ -569,8 +729,10 @@ pub fn ensure_sufficient_stack<R>(f: impl FnOnce() -> R) -> R { } /// Context that provides information local to a place under investigation. -#[derive(Clone)] +#[derive(derivative::Derivative)] +#[derivative(Debug(bound = ""), Clone(bound = ""), Copy(bound = ""))] pub(crate) struct PlaceCtxt<'a, 'p, Cx: TypeCx> { + #[derivative(Debug = "ignore")] pub(crate) mcx: MatchCtxt<'a, 'p, Cx>, /// Type of the place under investigation. pub(crate) ty: Cx::Ty, @@ -596,14 +758,6 @@ impl<'a, 'p, Cx: TypeCx> PlaceCtxt<'a, 'p, Cx> { } } -impl<'a, 'p, Cx: TypeCx> Copy for PlaceCtxt<'a, 'p, Cx> {} - -impl<'a, 'p, Cx: TypeCx> fmt::Debug for PlaceCtxt<'a, 'p, Cx> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("PlaceCtxt").field("ty", &self.ty).finish() - } -} - /// Serves two purposes: /// - in a wildcard, tracks whether the wildcard matches only valid values (i.e. is a binding `_a`) /// or also invalid values (i.e. is a true `_` pattern). @@ -670,15 +824,20 @@ impl fmt::Display for ValidityConstraint { // - 'a allocated by us // - 'p coming from the input // - Cx global compilation context -#[derive(Clone)] +#[derive(derivative::Derivative)] +#[derivative(Clone(bound = ""))] struct PatStack<'a, 'p, Cx: TypeCx> { // Rows of len 1 are very common, which is why `SmallVec[_; 2]` works well. pats: SmallVec<[&'a DeconstructedPat<'p, Cx>; 2]>, + /// Sometimes we know that as far as this row is concerned, the current case is already handled + /// by a different, more general, case. When the case is irrelevant for all rows this allows us + /// to skip a case entirely. This is purely an optimization. See at the top for details. + relevant: bool, } impl<'a, 'p, Cx: TypeCx> PatStack<'a, 'p, Cx> { fn from_pattern(pat: &'a DeconstructedPat<'p, Cx>) -> Self { - PatStack { pats: smallvec![pat] } + PatStack { pats: smallvec![pat], relevant: true } } fn is_empty(&self) -> bool { @@ -713,12 +872,17 @@ impl<'a, 'p, Cx: TypeCx> PatStack<'a, 'p, Cx> { &self, pcx: &PlaceCtxt<'a, 'p, Cx>, ctor: &Constructor<Cx>, + ctor_is_relevant: bool, ) -> PatStack<'a, 'p, Cx> { // We pop the head pattern and push the new fields extracted from the arguments of // `self.head()`. let mut new_pats = self.head().specialize(pcx, ctor); new_pats.extend_from_slice(&self.pats[1..]); - PatStack { pats: new_pats } + // `ctor` is relevant for this row if it is the actual constructor of this row, or if the + // row has a wildcard and `ctor` is relevant for wildcards. + let ctor_is_relevant = + !matches!(self.head().ctor(), Constructor::Wildcard) || ctor_is_relevant; + PatStack { pats: new_pats, relevant: self.relevant && ctor_is_relevant } } } @@ -784,10 +948,11 @@ impl<'a, 'p, Cx: TypeCx> MatrixRow<'a, 'p, Cx> { &self, pcx: &PlaceCtxt<'a, 'p, Cx>, ctor: &Constructor<Cx>, + ctor_is_relevant: bool, parent_row: usize, ) -> MatrixRow<'a, 'p, Cx> { MatrixRow { - pats: self.pats.pop_head_constructor(pcx, ctor), + pats: self.pats.pop_head_constructor(pcx, ctor, ctor_is_relevant), parent_row, is_under_guard: self.is_under_guard, useful: false, @@ -845,8 +1010,7 @@ impl<'a, 'p, Cx: TypeCx> Matrix<'a, 'p, Cx> { scrut_ty: Cx::Ty, scrut_validity: ValidityConstraint, ) -> Self { - let wild_pattern = - wildcard_arena.alloc(DeconstructedPat::wildcard(scrut_ty, Default::default())); + let wild_pattern = wildcard_arena.alloc(DeconstructedPat::wildcard(scrut_ty)); let wildcard_row = PatStack::from_pattern(wild_pattern); let mut matrix = Matrix { rows: Vec::with_capacity(arms.len()), @@ -865,24 +1029,14 @@ impl<'a, 'p, Cx: TypeCx> Matrix<'a, 'p, Cx> { matrix } - fn head_ty(&self) -> Option<Cx::Ty> { + fn head_ty(&self, mcx: MatchCtxt<'a, 'p, Cx>) -> Option<Cx::Ty> { if self.column_count() == 0 { return None; } - let mut ty = self.wildcard_row.head().ty(); - // If the type is opaque and it is revealed anywhere in the column, we take the revealed - // version. Otherwise we could encounter constructors for the revealed type and crash. - if Cx::is_opaque_ty(ty) { - for pat in self.heads() { - let pat_ty = pat.ty(); - if !Cx::is_opaque_ty(pat_ty) { - ty = pat_ty; - break; - } - } - } - Some(ty) + let ty = self.wildcard_row.head().ty(); + // FIXME(Nadrieril): `Cx` should only give us revealed types. + Some(mcx.tycx.reveal_opaque_ty(ty)) } fn column_count(&self) -> usize { self.wildcard_row.len() @@ -913,8 +1067,9 @@ impl<'a, 'p, Cx: TypeCx> Matrix<'a, 'p, Cx> { &self, pcx: &PlaceCtxt<'a, 'p, Cx>, ctor: &Constructor<Cx>, + ctor_is_relevant: bool, ) -> Matrix<'a, 'p, Cx> { - let wildcard_row = self.wildcard_row.pop_head_constructor(pcx, ctor); + let wildcard_row = self.wildcard_row.pop_head_constructor(pcx, ctor, ctor_is_relevant); let new_validity = self.place_validity[0].specialize(ctor); let new_place_validity = std::iter::repeat(new_validity) .take(ctor.arity(pcx)) @@ -924,7 +1079,7 @@ impl<'a, 'p, Cx: TypeCx> Matrix<'a, 'p, Cx> { Matrix { rows: Vec::new(), wildcard_row, place_validity: new_place_validity }; for (i, row) in self.rows().enumerate() { if ctor.is_covered_by(pcx, row.head().ctor()) { - let new_row = row.pop_head_constructor(pcx, ctor, i); + let new_row = row.pop_head_constructor(pcx, ctor, ctor_is_relevant, i); matrix.expand_and_push(new_row); } } @@ -1032,7 +1187,8 @@ impl<'a, 'p, Cx: TypeCx> fmt::Debug for Matrix<'a, 'p, Cx> { /// The final `Pair(Some(_), true)` is then the resulting witness. /// /// See the top of the file for more detailed explanations and examples. -#[derive(Debug, Clone)] +#[derive(derivative::Derivative)] +#[derivative(Debug(bound = ""), Clone(bound = ""))] struct WitnessStack<Cx: TypeCx>(Vec<WitnessPat<Cx>>); impl<Cx: TypeCx> WitnessStack<Cx> { @@ -1079,7 +1235,8 @@ impl<Cx: TypeCx> WitnessStack<Cx> { /// /// Just as the `Matrix` starts with a single column, by the end of the algorithm, this has a single /// column, which contains the patterns that are missing for the match to be exhaustive. -#[derive(Debug, Clone)] +#[derive(derivative::Derivative)] +#[derivative(Debug(bound = ""), Clone(bound = ""))] struct WitnessMatrix<Cx: TypeCx>(Vec<WitnessStack<Cx>>); impl<Cx: TypeCx> WitnessMatrix<Cx> { @@ -1122,7 +1279,10 @@ impl<Cx: TypeCx> WitnessMatrix<Cx> { if matches!(ctor, Constructor::Missing) { // We got the special `Missing` constructor that stands for the constructors not present // in the match. - if !report_individual_missing_ctors { + if missing_ctors.is_empty() { + // Nothing to report. + *self = Self::empty(); + } else if !report_individual_missing_ctors { // Report `_` as missing. let pat = WitnessPat::wild_from_ctor(pcx, Constructor::Wildcard); self.push_pattern(pat); @@ -1181,7 +1341,14 @@ fn compute_exhaustiveness_and_usefulness<'a, 'p, Cx: TypeCx>( ) -> WitnessMatrix<Cx> { debug_assert!(matrix.rows().all(|r| r.len() == matrix.column_count())); - let Some(ty) = matrix.head_ty() else { + if !matrix.wildcard_row.relevant && matrix.rows().all(|r| !r.pats.relevant) { + // Here we know that nothing will contribute further to exhaustiveness or usefulness. This + // is purely an optimization: skipping this check doesn't affect correctness. See the top of + // the file for details. + return WitnessMatrix::empty(); + } + + let Some(ty) = matrix.head_ty(mcx) else { // The base case: there are no columns in the matrix. We are morally pattern-matching on (). // A row is useful iff it has no (unguarded) rows above it. for row in matrix.rows_mut() { @@ -1193,8 +1360,14 @@ fn compute_exhaustiveness_and_usefulness<'a, 'p, Cx: TypeCx>( return WitnessMatrix::empty(); } } - // No (unguarded) rows, so the match is not exhaustive. We return a new witness. - return WitnessMatrix::unit_witness(); + // No (unguarded) rows, so the match is not exhaustive. We return a new witness unless + // irrelevant. + return if matrix.wildcard_row.relevant { + WitnessMatrix::unit_witness() + } else { + // We choose to not report anything here; see at the top for details. + WitnessMatrix::empty() + }; }; debug!("ty: {ty:?}"); @@ -1237,32 +1410,21 @@ fn compute_exhaustiveness_and_usefulness<'a, 'p, Cx: TypeCx>( let mut ret = WitnessMatrix::empty(); for ctor in split_ctors { - debug!("specialize({:?})", ctor); // Dig into rows that match `ctor`. - let mut spec_matrix = matrix.specialize_constructor(pcx, &ctor); + debug!("specialize({:?})", ctor); + // `ctor` is *irrelevant* if there's another constructor in `split_ctors` that matches + // strictly fewer rows. In that case we can sometimes skip it. See the top of the file for + // details. + let ctor_is_relevant = matches!(ctor, Constructor::Missing) || missing_ctors.is_empty(); + let mut spec_matrix = matrix.specialize_constructor(pcx, &ctor, ctor_is_relevant); let mut witnesses = ensure_sufficient_stack(|| { compute_exhaustiveness_and_usefulness(mcx, &mut spec_matrix, false) }); - let counts_for_exhaustiveness = match ctor { - Constructor::Missing => !missing_ctors.is_empty(), - // If there are missing constructors we'll report those instead. Since `Missing` matches - // only the wildcard rows, it matches fewer rows than this constructor, and is therefore - // guaranteed to result in the same or more witnesses. So skipping this does not - // jeopardize correctness. - _ => missing_ctors.is_empty(), - }; - if counts_for_exhaustiveness { - // Transform witnesses for `spec_matrix` into witnesses for `matrix`. - witnesses.apply_constructor( - pcx, - &missing_ctors, - &ctor, - report_individual_missing_ctors, - ); - // Accumulate the found witnesses. - ret.extend(witnesses); - } + // Transform witnesses for `spec_matrix` into witnesses for `matrix`. + witnesses.apply_constructor(pcx, &missing_ctors, &ctor, report_individual_missing_ctors); + // Accumulate the found witnesses. + ret.extend(witnesses); // A parent row is useful if any of its children is. for child_row in spec_matrix.rows() { |
