diff options
Diffstat (limited to 'compiler/rustc_pattern_analysis/src/constructor.rs')
| -rw-r--r-- | compiler/rustc_pattern_analysis/src/constructor.rs | 1241 |
1 files changed, 1241 insertions, 0 deletions
diff --git a/compiler/rustc_pattern_analysis/src/constructor.rs b/compiler/rustc_pattern_analysis/src/constructor.rs new file mode 100644 index 00000000000..4c12cb3b029 --- /dev/null +++ b/compiler/rustc_pattern_analysis/src/constructor.rs @@ -0,0 +1,1241 @@ +//! As explained in [`super::usefulness`], values and patterns are made from constructors applied to +//! fields. This file defines a `Constructor` enum and various operations to manipulate them. +//! +//! There are two important bits of core logic in this file: constructor inclusion and constructor +//! splitting. Constructor inclusion, i.e. whether a constructor is included in/covered by another, +//! is straightforward and defined in [`Constructor::is_covered_by`]. +//! +//! Constructor splitting is mentioned in [`super::usefulness`] but not detailed. We describe it +//! precisely here. +//! +//! +//! +//! # Constructor grouping and splitting +//! +//! As explained in the corresponding section in [`super::usefulness`], to make usefulness tractable +//! we need to group together constructors that have the same effect when they are used to +//! specialize the matrix. +//! +//! Example: +//! ```compile_fail,E0004 +//! match (0, false) { +//! (0 ..=100, true) => {} +//! (50..=150, false) => {} +//! (0 ..=200, _) => {} +//! } +//! ``` +//! +//! In this example we can restrict specialization to 5 cases: `0..50`, `50..=100`, `101..=150`, +//! `151..=200` and `200..`. +//! +//! In [`super::usefulness`], we had said that `specialize` only takes value-only constructors. We +//! now relax this restriction: we allow `specialize` to take constructors like `0..50` as long as +//! we're careful to only do that with constructors that make sense. For example, `specialize(0..50, +//! (0..=100, true))` is sensible, but `specialize(50..=200, (0..=100, true))` is not. +//! +//! Constructor splitting looks at the constructors in the first column of the matrix and constructs +//! such a sensible set of constructors. Formally, we want to find a smallest disjoint set of +//! constructors: +//! - Whose union covers the whole type, and +//! - That have no non-trivial intersection with any of the constructors in the column (i.e. they're +//! each either disjoint with or covered by any given column constructor). +//! +//! We compute this in two steps: first [`ConstructorSet::for_ty`] determines the set of all +//! possible constructors for the type. Then [`ConstructorSet::split`] looks at the column of +//! constructors and splits the set into groups accordingly. The precise invariants of +//! [`ConstructorSet::split`] is described in [`SplitConstructorSet`]. +//! +//! Constructor splitting has two interesting special cases: integer range splitting (see +//! [`IntRange::split`]) and slice splitting (see [`Slice::split`]). +//! +//! +//! +//! # The `Missing` constructor +//! +//! We detail a special case of constructor splitting that is a bit subtle. Take the following: +//! +//! ``` +//! enum Direction { North, South, East, West } +//! # let wind = (Direction::North, 0u8); +//! match wind { +//! (Direction::North, 50..) => {} +//! (_, _) => {} +//! } +//! ``` +//! +//! Here we expect constructor splitting to output two cases: `North`, and "everything else". This +//! "everything else" is represented by [`Constructor::Missing`]. Unlike other constructors, it's a +//! bit contextual: to know the exact list of constructors it represents we have to look at the +//! column. In practice however we don't need to, because by construction it only matches rows that +//! have wildcards. This is how this constructor is special: the only constructor that covers it is +//! `Wildcard`. +//! +//! The only place where we care about which constructors `Missing` represents is in diagnostics +//! (see `super::usefulness::WitnessMatrix::apply_constructor`). +//! +//! We choose whether to specialize with `Missing` in +//! `super::usefulness::compute_exhaustiveness_and_reachability`. +//! +//! +//! +//! ## Empty types, empty constructors, and the `exhaustive_patterns` feature +//! +//! An empty type is a type that has no valid value, like `!`, `enum Void {}`, or `Result<!, !>`. +//! They require careful handling. +//! +//! First, for soundness reasons related to the possible existence of invalid values, by default we +//! don't treat empty types as empty. We force them to be matched with wildcards. Except if the +//! `exhaustive_patterns` feature is turned on, in which case we do treat them as empty. And also +//! except if the type has no constructors (like `enum Void {}` but not like `Result<!, !>`), we +//! specifically allow `match void {}` to be exhaustive. There are additionally considerations of +//! place validity that are handled in `super::usefulness`. Yes this is a bit tricky. +//! +//! The second thing is that regardless of the above, it is always allowed to use all the +//! constructors of a type. For example, all the following is ok: +//! +//! ```rust,ignore(example) +//! # #![feature(never_type)] +//! # #![feature(exhaustive_patterns)] +//! fn foo(x: Option<!>) { +//! match x { +//! None => {} +//! Some(_) => {} +//! } +//! } +//! fn bar(x: &[!]) -> u32 { +//! match x { +//! [] => 1, +//! [_] => 2, +//! [_, _] => 3, +//! } +//! } +//! ``` +//! +//! Moreover, take the following: +//! +//! ```rust +//! # #![feature(never_type)] +//! # #![feature(exhaustive_patterns)] +//! # let x = None::<!>; +//! match x { +//! None => {} +//! } +//! ``` +//! +//! On a normal type, we would identify `Some` as missing and tell the user. If `x: Option<!>` +//! however (and `exhaustive_patterns` is on), it's ok to omit `Some`. When listing the constructors +//! of a type, we must therefore track which can be omitted. +//! +//! Let's call "empty" a constructor that matches no valid value for the type, like `Some` for the +//! type `Option<!>`. What this all means is that `ConstructorSet` must know which constructors are +//! empty. The difference between empty and nonempty constructors is that empty constructors need +//! not be present for the match to be exhaustive. +//! +//! A final remark: empty constructors of arity 0 break specialization, we must avoid them. The +//! reason is that if we specialize by them, nothing remains to witness the emptiness; the rest of +//! the algorithm can't distinguish them from a nonempty constructor. The only known case where this +//! could happen is the `[..]` pattern on `[!; N]` with `N > 0` so we must take care to not emit it. +//! +//! This is all handled by [`ConstructorSet::for_ty`] and [`ConstructorSet::split`]. The invariants +//! of [`SplitConstructorSet`] are also of interest. +//! +//! +//! +//! ## Opaque patterns +//! +//! Some patterns, such as constants that are not allowed to be matched structurally, cannot be +//! inspected, which we handle with `Constructor::Opaque`. Since we know nothing of these patterns, +//! we assume they never cover each other. In order to respect the invariants of +//! [`SplitConstructorSet`], we give each `Opaque` constructor a unique id so we can recognize it. + +use std::cmp::{self, max, min, Ordering}; +use std::fmt; +use std::iter::once; + +use smallvec::SmallVec; + +use rustc_apfloat::ieee::{DoubleS, IeeeFloat, SingleS}; +use rustc_data_structures::fx::FxHashSet; +use rustc_hir::RangeEnd; +use rustc_index::IndexVec; +use rustc_middle::middle::stability::EvalResult; +use rustc_middle::mir; +use rustc_middle::mir::interpret::Scalar; +use rustc_middle::thir::{Pat, PatKind, PatRange, PatRangeBoundary}; +use rustc_middle::ty::layout::IntegerExt; +use rustc_middle::ty::{self, Ty, TyCtxt}; +use rustc_span::DUMMY_SP; +use rustc_target::abi::{Integer, VariantIdx, FIRST_VARIANT}; + +use self::Constructor::*; +use self::MaybeInfiniteInt::*; +use self::SliceKind::*; + +use crate::pat::Fields; +use crate::usefulness::{MatchCheckCtxt, PatCtxt}; + +/// Whether we have seen a constructor in the column or not. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum Presence { + Unseen, + Seen, +} + +/// A possibly infinite integer. Values are encoded such that the ordering on `u128` matches the +/// natural order on the original type. For example, `-128i8` is encoded as `0` and `127i8` as +/// `255`. See `signed_bias` for details. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum MaybeInfiniteInt { + NegInfinity, + /// Encoded value. DO NOT CONSTRUCT BY HAND; use `new_finite`. + Finite(u128), + /// The integer after `u128::MAX`. We need it to represent `x..=u128::MAX` as an exclusive range. + JustAfterMax, + PosInfinity, +} + +impl MaybeInfiniteInt { + // The return value of `signed_bias` should be XORed with a value to encode/decode it. + fn signed_bias(tcx: TyCtxt<'_>, ty: Ty<'_>) -> u128 { + match *ty.kind() { + ty::Int(ity) => { + let bits = Integer::from_int_ty(&tcx, ity).size().bits() as u128; + 1u128 << (bits - 1) + } + _ => 0, + } + } + + fn new_finite(tcx: TyCtxt<'_>, ty: Ty<'_>, bits: u128) -> Self { + let bias = Self::signed_bias(tcx, ty); + // Perform a shift if the underlying types are signed, which makes the interval arithmetic + // type-independent. + let x = bits ^ bias; + Finite(x) + } + pub(crate) fn from_pat_range_bdy<'tcx>( + bdy: PatRangeBoundary<'tcx>, + ty: Ty<'tcx>, + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, + ) -> Self { + match bdy { + PatRangeBoundary::NegInfinity => NegInfinity, + PatRangeBoundary::Finite(value) => { + let bits = value.eval_bits(tcx, param_env); + Self::new_finite(tcx, ty, bits) + } + PatRangeBoundary::PosInfinity => PosInfinity, + } + } + + /// Used only for diagnostics. + /// Note: it is possible to get `isize/usize::MAX+1` here, as explained in the doc for + /// [`IntRange::split`]. This cannot be represented as a `Const`, so we represent it with + /// `PosInfinity`. + fn to_diagnostic_pat_range_bdy<'tcx>( + self, + ty: Ty<'tcx>, + tcx: TyCtxt<'tcx>, + ) -> PatRangeBoundary<'tcx> { + match self { + NegInfinity => PatRangeBoundary::NegInfinity, + Finite(x) => { + let bias = Self::signed_bias(tcx, ty); + let bits = x ^ bias; + let size = ty.primitive_size(tcx); + match Scalar::try_from_uint(bits, size) { + Some(scalar) => { + let value = mir::Const::from_scalar(tcx, scalar, ty); + PatRangeBoundary::Finite(value) + } + // The value doesn't fit. Since `x >= 0` and 0 always encodes the minimum value + // for a type, the problem isn't that the value is too small. So it must be too + // large. + None => PatRangeBoundary::PosInfinity, + } + } + JustAfterMax | PosInfinity => PatRangeBoundary::PosInfinity, + } + } + + /// Note: this will not turn a finite value into an infinite one or vice-versa. + pub fn minus_one(self) -> Self { + match self { + Finite(n) => match n.checked_sub(1) { + Some(m) => Finite(m), + None => bug!(), + }, + JustAfterMax => Finite(u128::MAX), + x => x, + } + } + /// Note: this will not turn a finite value into an infinite one or vice-versa. + pub fn plus_one(self) -> Self { + match self { + Finite(n) => match n.checked_add(1) { + Some(m) => Finite(m), + None => JustAfterMax, + }, + JustAfterMax => bug!(), + x => x, + } + } +} + +/// An exclusive interval, used for precise integer exhaustiveness checking. `IntRange`s always +/// store a contiguous range. +/// +/// `IntRange` is never used to encode an empty range or a "range" that wraps around the (offset) +/// space: i.e., `range.lo < range.hi`. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct IntRange { + pub(crate) lo: MaybeInfiniteInt, // Must not be `PosInfinity`. + pub(crate) hi: MaybeInfiniteInt, // Must not be `NegInfinity`. +} + +impl IntRange { + #[inline] + pub(super) fn is_integral(ty: Ty<'_>) -> bool { + matches!(ty.kind(), ty::Char | ty::Int(_) | ty::Uint(_)) + } + + /// Best effort; will not know that e.g. `255u8..` is a singleton. + pub fn is_singleton(&self) -> bool { + // Since `lo` and `hi` can't be the same `Infinity` and `plus_one` never changes from finite + // to infinite, this correctly only detects ranges that contain exacly one `Finite(x)`. + self.lo.plus_one() == self.hi + } + + #[inline] + pub fn from_bits<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>, bits: u128) -> IntRange { + let x = MaybeInfiniteInt::new_finite(tcx, ty, bits); + IntRange { lo: x, hi: x.plus_one() } + } + + #[inline] + pub fn from_range(lo: MaybeInfiniteInt, mut hi: MaybeInfiniteInt, end: RangeEnd) -> IntRange { + if end == RangeEnd::Included { + hi = hi.plus_one(); + } + if lo >= hi { + // This should have been caught earlier by E0030. + bug!("malformed range pattern: {lo:?}..{hi:?}"); + } + IntRange { lo, hi } + } + + fn is_subrange(&self, other: &Self) -> bool { + other.lo <= self.lo && self.hi <= other.hi + } + + fn intersection(&self, other: &Self) -> Option<Self> { + if self.lo < other.hi && other.lo < self.hi { + Some(IntRange { lo: max(self.lo, other.lo), hi: min(self.hi, other.hi) }) + } else { + None + } + } + + /// Partition a range of integers into disjoint subranges. This does constructor splitting for + /// integer ranges as explained at the top of the file. + /// + /// This returns an output that covers `self`. The output is split so that the only + /// intersections between an output range and a column range are inclusions. No output range + /// straddles the boundary of one of the inputs. + /// + /// Additionally, we track for each output range whether it is covered by one of the column ranges or not. + /// + /// The following input: + /// ```text + /// (--------------------------) // `self` + /// (------) (----------) (-) + /// (------) (--------) + /// ``` + /// is first intersected with `self`: + /// ```text + /// (--------------------------) // `self` + /// (----) (----------) (-) + /// (------) (--------) + /// ``` + /// and then iterated over as follows: + /// ```text + /// (-(--)-(-)-(------)-)--(-)- + /// ``` + /// where each sequence of dashes is an output range, and dashes outside parentheses are marked + /// as `Presence::Missing`. + /// + /// ## `isize`/`usize` + /// + /// Whereas a wildcard of type `i32` stands for the range `i32::MIN..=i32::MAX`, a `usize` + /// wildcard stands for `0..PosInfinity` and a `isize` wildcard stands for + /// `NegInfinity..PosInfinity`. In other words, as far as `IntRange` is concerned, there are + /// values before `isize::MIN` and after `usize::MAX`/`isize::MAX`. + /// This is to avoid e.g. `0..(u32::MAX as usize)` from being exhaustive on one architecture and + /// not others. This was decided in <https://github.com/rust-lang/rfcs/pull/2591>. + /// + /// These infinities affect splitting subtly: it is possible to get `NegInfinity..0` and + /// `usize::MAX+1..PosInfinity` in the output. Diagnostics must be careful to handle these + /// fictitious ranges sensibly. + fn split( + &self, + column_ranges: impl Iterator<Item = IntRange>, + ) -> impl Iterator<Item = (Presence, IntRange)> { + // The boundaries of ranges in `column_ranges` intersected with `self`. + // We do parenthesis matching for input ranges. A boundary counts as +1 if it starts + // a range and -1 if it ends it. When the count is > 0 between two boundaries, we + // are within an input range. + let mut boundaries: Vec<(MaybeInfiniteInt, isize)> = column_ranges + .filter_map(|r| self.intersection(&r)) + .flat_map(|r| [(r.lo, 1), (r.hi, -1)]) + .collect(); + // We sort by boundary, and for each boundary we sort the "closing parentheses" first. The + // order of +1/-1 for a same boundary value is actually irrelevant, because we only look at + // the accumulated count between distinct boundary values. + boundaries.sort_unstable(); + + // Accumulate parenthesis counts. + let mut paren_counter = 0isize; + // Gather pairs of adjacent boundaries. + let mut prev_bdy = self.lo; + boundaries + .into_iter() + // End with the end of the range. The count is ignored. + .chain(once((self.hi, 0))) + // List pairs of adjacent boundaries and the count between them. + .map(move |(bdy, delta)| { + // `delta` affects the count as we cross `bdy`, so the relevant count between + // `prev_bdy` and `bdy` is untouched by `delta`. + let ret = (prev_bdy, paren_counter, bdy); + prev_bdy = bdy; + paren_counter += delta; + ret + }) + // Skip empty ranges. + .filter(|&(prev_bdy, _, bdy)| prev_bdy != bdy) + // Convert back to ranges. + .map(move |(prev_bdy, paren_count, bdy)| { + use Presence::*; + let presence = if paren_count > 0 { Seen } else { Unseen }; + let range = IntRange { lo: prev_bdy, hi: bdy }; + (presence, range) + }) + } + + /// Whether the range denotes the fictitious values before `isize::MIN` or after + /// `usize::MAX`/`isize::MAX` (see doc of [`IntRange::split`] for why these exist). + pub fn is_beyond_boundaries<'tcx>(&self, ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> bool { + ty.is_ptr_sized_integral() && { + // The two invalid ranges are `NegInfinity..isize::MIN` (represented as + // `NegInfinity..0`), and `{u,i}size::MAX+1..PosInfinity`. `to_diagnostic_pat_range_bdy` + // converts `MAX+1` to `PosInfinity`, and we couldn't have `PosInfinity` in `self.lo` + // otherwise. + let lo = self.lo.to_diagnostic_pat_range_bdy(ty, tcx); + matches!(lo, PatRangeBoundary::PosInfinity) + || matches!(self.hi, MaybeInfiniteInt::Finite(0)) + } + } + /// Only used for displaying the range. + pub(super) fn to_diagnostic_pat<'tcx>(&self, ty: Ty<'tcx>, tcx: TyCtxt<'tcx>) -> Pat<'tcx> { + let kind = if matches!((self.lo, self.hi), (NegInfinity, PosInfinity)) { + PatKind::Wild + } else if self.is_singleton() { + let lo = self.lo.to_diagnostic_pat_range_bdy(ty, tcx); + let value = lo.as_finite().unwrap(); + PatKind::Constant { value } + } else { + // We convert to an inclusive range for diagnostics. + let mut end = RangeEnd::Included; + let mut lo = self.lo.to_diagnostic_pat_range_bdy(ty, tcx); + if matches!(lo, PatRangeBoundary::PosInfinity) { + // The only reason to get `PosInfinity` here is the special case where + // `to_diagnostic_pat_range_bdy` found `{u,i}size::MAX+1`. So the range denotes the + // fictitious values after `{u,i}size::MAX` (see [`IntRange::split`] for why we do + // this). We show this to the user as `usize::MAX..` which is slightly incorrect but + // probably clear enough. + let c = ty.numeric_max_val(tcx).unwrap(); + let value = mir::Const::from_ty_const(c, tcx); + lo = PatRangeBoundary::Finite(value); + } + let hi = if matches!(self.hi, MaybeInfiniteInt::Finite(0)) { + // The range encodes `..ty::MIN`, so we can't convert it to an inclusive range. + end = RangeEnd::Excluded; + self.hi + } else { + self.hi.minus_one() + }; + let hi = hi.to_diagnostic_pat_range_bdy(ty, tcx); + PatKind::Range(Box::new(PatRange { lo, hi, end, ty })) + }; + + Pat { ty, span: DUMMY_SP, kind } + } +} + +/// Note: this will render signed ranges incorrectly. To render properly, convert to a pattern +/// first. +impl fmt::Debug for IntRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Finite(lo) = self.lo { + write!(f, "{lo}")?; + } + write!(f, "{}", RangeEnd::Excluded)?; + if let Finite(hi) = self.hi { + write!(f, "{hi}")?; + } + Ok(()) + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum SliceKind { + /// Patterns of length `n` (`[x, y]`). + FixedLen(usize), + /// Patterns using the `..` notation (`[x, .., y]`). + /// Captures any array constructor of `length >= i + j`. + /// In the case where `array_len` is `Some(_)`, + /// this indicates that we only care about the first `i` and the last `j` values of the array, + /// and everything in between is a wildcard `_`. + VarLen(usize, usize), +} + +impl SliceKind { + fn arity(self) -> usize { + match self { + FixedLen(length) => length, + VarLen(prefix, suffix) => prefix + suffix, + } + } + + /// Whether this pattern includes patterns of length `other_len`. + fn covers_length(self, other_len: usize) -> bool { + match self { + FixedLen(len) => len == other_len, + VarLen(prefix, suffix) => prefix + suffix <= other_len, + } + } +} + +/// A constructor for array and slice patterns. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct Slice { + /// `None` if the matched value is a slice, `Some(n)` if it is an array of size `n`. + pub(crate) array_len: Option<usize>, + /// The kind of pattern it is: fixed-length `[x, y]` or variable length `[x, .., y]`. + pub(crate) kind: SliceKind, +} + +impl Slice { + pub fn new(array_len: Option<usize>, kind: SliceKind) -> Self { + let kind = match (array_len, kind) { + // If the middle `..` has length 0, we effectively have a fixed-length pattern. + (Some(len), VarLen(prefix, suffix)) if prefix + suffix == len => FixedLen(len), + (Some(len), VarLen(prefix, suffix)) if prefix + suffix > len => bug!( + "Slice pattern of length {} longer than its array length {len}", + prefix + suffix + ), + _ => kind, + }; + Slice { array_len, kind } + } + + pub(crate) fn arity(self) -> usize { + self.kind.arity() + } + + /// See `Constructor::is_covered_by` + fn is_covered_by(self, other: Self) -> bool { + other.kind.covers_length(self.arity()) + } + + /// This computes constructor splitting for variable-length slices, as explained at the top of + /// the file. + /// + /// A slice pattern `[x, .., y]` behaves like the infinite or-pattern `[x, y] | [x, _, y] | [x, + /// _, _, y] | etc`. The corresponding value constructors are fixed-length array constructors of + /// corresponding lengths. We obviously can't list this infinitude of constructors. + /// Thankfully, it turns out that for each finite set of slice patterns, all sufficiently large + /// array lengths are equivalent. + /// + /// Let's look at an example, where we are trying to split the last pattern: + /// ``` + /// # fn foo(x: &[bool]) { + /// match x { + /// [true, true, ..] => {} + /// [.., false, false] => {} + /// [..] => {} + /// } + /// # } + /// ``` + /// Here are the results of specialization for the first few lengths: + /// ``` + /// # fn foo(x: &[bool]) { match x { + /// // length 0 + /// [] => {} + /// // length 1 + /// [_] => {} + /// // length 2 + /// [true, true] => {} + /// [false, false] => {} + /// [_, _] => {} + /// // length 3 + /// [true, true, _ ] => {} + /// [_, false, false] => {} + /// [_, _, _ ] => {} + /// // length 4 + /// [true, true, _, _ ] => {} + /// [_, _, false, false] => {} + /// [_, _, _, _ ] => {} + /// // length 5 + /// [true, true, _, _, _ ] => {} + /// [_, _, _, false, false] => {} + /// [_, _, _, _, _ ] => {} + /// # _ => {} + /// # }} + /// ``` + /// + /// We see that above length 4, we are simply inserting columns full of wildcards in the middle. + /// This means that specialization and witness computation with slices of length `l >= 4` will + /// give equivalent results regardless of `l`. This applies to any set of slice patterns: there + /// will be a length `L` above which all lengths behave the same. This is exactly what we need + /// for constructor splitting. + /// + /// A variable-length slice pattern covers all lengths from its arity up to infinity. As we just + /// saw, we can split this in two: lengths below `L` are treated individually with a + /// fixed-length slice each; lengths above `L` are grouped into a single variable-length slice + /// constructor. + /// + /// For each variable-length slice pattern `p` with a prefix of length `plₚ` and suffix of + /// length `slₚ`, only the first `plₚ` and the last `slₚ` elements are examined. Therefore, as + /// long as `L` is positive (to avoid concerns about empty types), all elements after the + /// maximum prefix length and before the maximum suffix length are not examined by any + /// variable-length pattern, and therefore can be ignored. This gives us a way to compute `L`. + /// + /// Additionally, if fixed-length patterns exist, we must pick an `L` large enough to miss them, + /// so we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))`. + /// `max_slice` below will be made to have this arity `L`. + /// + /// If `self` is fixed-length, it is returned as-is. + /// + /// Additionally, we track for each output slice whether it is covered by one of the column slices or not. + fn split( + self, + column_slices: impl Iterator<Item = Slice>, + ) -> impl Iterator<Item = (Presence, Slice)> { + // Range of lengths below `L`. + let smaller_lengths; + let arity = self.arity(); + let mut max_slice = self.kind; + // Tracks the smallest variable-length slice we've seen. Any slice arity above it is + // therefore `Presence::Seen` in the column. + let mut min_var_len = usize::MAX; + // Tracks the fixed-length slices we've seen, to mark them as `Presence::Seen`. + let mut seen_fixed_lens = FxHashSet::default(); + match &mut max_slice { + VarLen(max_prefix_len, max_suffix_len) => { + // A length larger than any fixed-length slice encountered. + // We start at 1 in case the subtype is empty because in that case the zero-length + // slice must be treated separately from the rest. + let mut fixed_len_upper_bound = 1; + // We grow `max_slice` to be larger than all slices encountered, as described above. + // `L` is `max_slice.arity()`. For diagnostics, we keep the prefix and suffix + // lengths separate. + for slice in column_slices { + match slice.kind { + FixedLen(len) => { + fixed_len_upper_bound = cmp::max(fixed_len_upper_bound, len + 1); + seen_fixed_lens.insert(len); + } + VarLen(prefix, suffix) => { + *max_prefix_len = cmp::max(*max_prefix_len, prefix); + *max_suffix_len = cmp::max(*max_suffix_len, suffix); + min_var_len = cmp::min(min_var_len, prefix + suffix); + } + } + } + // If `fixed_len_upper_bound >= L`, we set `L` to `fixed_len_upper_bound`. + if let Some(delta) = + fixed_len_upper_bound.checked_sub(*max_prefix_len + *max_suffix_len) + { + *max_prefix_len += delta + } + + // We cap the arity of `max_slice` at the array size. + match self.array_len { + Some(len) if max_slice.arity() >= len => max_slice = FixedLen(len), + _ => {} + } + + smaller_lengths = match self.array_len { + // The only admissible fixed-length slice is one of the array size. Whether `max_slice` + // is fixed-length or variable-length, it will be the only relevant slice to output + // here. + Some(_) => 0..0, // empty range + // We need to cover all arities in the range `(arity..infinity)`. We split that + // range into two: lengths smaller than `max_slice.arity()` are treated + // independently as fixed-lengths slices, and lengths above are captured by + // `max_slice`. + None => self.arity()..max_slice.arity(), + }; + } + FixedLen(_) => { + // No need to split here. We only track presence. + for slice in column_slices { + match slice.kind { + FixedLen(len) => { + if len == arity { + seen_fixed_lens.insert(len); + } + } + VarLen(prefix, suffix) => { + min_var_len = cmp::min(min_var_len, prefix + suffix); + } + } + } + smaller_lengths = 0..0; + } + }; + + smaller_lengths.map(FixedLen).chain(once(max_slice)).map(move |kind| { + let arity = kind.arity(); + let seen = if min_var_len <= arity || seen_fixed_lens.contains(&arity) { + Presence::Seen + } else { + Presence::Unseen + }; + (seen, Slice::new(self.array_len, kind)) + }) + } +} + +/// A globally unique id to distinguish `Opaque` patterns. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct OpaqueId(u32); + +impl OpaqueId { + pub fn new() -> Self { + use std::sync::atomic::{AtomicU32, Ordering}; + static OPAQUE_ID: AtomicU32 = AtomicU32::new(0); + OpaqueId(OPAQUE_ID.fetch_add(1, Ordering::SeqCst)) + } +} + +/// A value can be decomposed into a constructor applied to some fields. This struct represents +/// the constructor. See also `Fields`. +/// +/// `pat_constructor` retrieves the constructor corresponding to a pattern. +/// `specialize_constructor` returns the list of fields corresponding to a pattern, given a +/// constructor. `Constructor::apply` reconstructs the pattern from a pair of `Constructor` and +/// `Fields`. +#[derive(Clone, Debug, PartialEq)] +pub enum Constructor<'tcx> { + /// The constructor for patterns that have a single constructor, like tuples, struct patterns, + /// and references. Fixed-length arrays are treated separately with `Slice`. + Single, + /// Enum variants. + Variant(VariantIdx), + /// Booleans + Bool(bool), + /// Ranges of integer literal values (`2`, `2..=5` or `2..5`). + IntRange(IntRange), + /// Ranges of floating-point literal values (`2.0..=5.2`). + F32Range(IeeeFloat<SingleS>, IeeeFloat<SingleS>, RangeEnd), + F64Range(IeeeFloat<DoubleS>, IeeeFloat<DoubleS>, RangeEnd), + /// String literals. Strings are not quite the same as `&[u8]` so we treat them separately. + Str(mir::Const<'tcx>), + /// Array and slice patterns. + Slice(Slice), + /// Constants that must not be matched structurally. They are treated as black boxes for the + /// purposes of exhaustiveness: we must not inspect them, and they don't count towards making a + /// match exhaustive. + /// Carries an id that must be unique within a match. We need this to ensure the invariants of + /// [`SplitConstructorSet`]. + Opaque(OpaqueId), + /// Or-pattern. + Or, + /// Wildcard pattern. + Wildcard, + /// Fake extra constructor for enums that aren't allowed to be matched exhaustively. Also used + /// for those types for which we cannot list constructors explicitly, like `f64` and `str`. + NonExhaustive, + /// Fake extra constructor for variants that should not be mentioned in diagnostics. + /// We use this for variants behind an unstable gate as well as + /// `#[doc(hidden)]` ones. + Hidden, + /// Fake extra constructor for constructors that are not seen in the matrix, as explained at the + /// top of the file. + Missing, +} + +impl<'tcx> Constructor<'tcx> { + pub(super) fn is_non_exhaustive(&self) -> bool { + matches!(self, NonExhaustive) + } + + pub(super) fn as_variant(&self) -> Option<VariantIdx> { + match self { + Variant(i) => Some(*i), + _ => None, + } + } + fn as_bool(&self) -> Option<bool> { + match self { + Bool(b) => Some(*b), + _ => None, + } + } + pub(super) fn as_int_range(&self) -> Option<&IntRange> { + match self { + IntRange(range) => Some(range), + _ => None, + } + } + fn as_slice(&self) -> Option<Slice> { + match self { + Slice(slice) => Some(*slice), + _ => None, + } + } + + pub(crate) fn variant_index_for_adt(&self, adt: ty::AdtDef<'tcx>) -> VariantIdx { + match *self { + Variant(idx) => idx, + Single => { + assert!(!adt.is_enum()); + FIRST_VARIANT + } + _ => bug!("bad constructor {:?} for adt {:?}", self, adt), + } + } + + /// The number of fields for this constructor. This must be kept in sync with + /// `Fields::wildcards`. + pub(crate) fn arity(&self, pcx: &PatCtxt<'_, '_, 'tcx>) -> usize { + match self { + Single | Variant(_) => match pcx.ty.kind() { + ty::Tuple(fs) => fs.len(), + ty::Ref(..) => 1, + ty::Adt(adt, ..) => { + if adt.is_box() { + // The only legal patterns of type `Box` (outside `std`) are `_` and box + // patterns. If we're here we can assume this is a box pattern. + 1 + } else { + let variant = &adt.variant(self.variant_index_for_adt(*adt)); + Fields::list_variant_nonhidden_fields(pcx.cx, pcx.ty, variant).count() + } + } + _ => bug!("Unexpected type for `Single` constructor: {:?}", pcx.ty), + }, + Slice(slice) => slice.arity(), + Bool(..) + | IntRange(..) + | F32Range(..) + | F64Range(..) + | Str(..) + | Opaque(..) + | NonExhaustive + | Hidden + | Missing { .. } + | Wildcard => 0, + Or => bug!("The `Or` constructor doesn't have a fixed arity"), + } + } + + /// Returns whether `self` is covered by `other`, i.e. whether `self` is a subset of `other`. + /// For the simple cases, this is simply checking for equality. For the "grouped" constructors, + /// this checks for inclusion. + // We inline because this has a single call site in `Matrix::specialize_constructor`. + #[inline] + pub(crate) fn is_covered_by<'p>(&self, pcx: &PatCtxt<'_, 'p, 'tcx>, other: &Self) -> bool { + match (self, other) { + (Wildcard, _) => { + span_bug!( + pcx.cx.scrut_span, + "Constructor splitting should not have returned `Wildcard`" + ) + } + // Wildcards cover anything + (_, Wildcard) => true, + // Only a wildcard pattern can match these special constructors. + (Missing { .. } | NonExhaustive | Hidden, _) => false, + + (Single, Single) => true, + (Variant(self_id), Variant(other_id)) => self_id == other_id, + (Bool(self_b), Bool(other_b)) => self_b == other_b, + + (IntRange(self_range), IntRange(other_range)) => self_range.is_subrange(other_range), + (F32Range(self_from, self_to, self_end), F32Range(other_from, other_to, other_end)) => { + self_from.ge(other_from) + && match self_to.partial_cmp(other_to) { + Some(Ordering::Less) => true, + Some(Ordering::Equal) => other_end == self_end, + _ => false, + } + } + (F64Range(self_from, self_to, self_end), F64Range(other_from, other_to, other_end)) => { + self_from.ge(other_from) + && match self_to.partial_cmp(other_to) { + Some(Ordering::Less) => true, + Some(Ordering::Equal) => other_end == self_end, + _ => false, + } + } + (Str(self_val), Str(other_val)) => { + // FIXME Once valtrees are available we can directly use the bytes + // in the `Str` variant of the valtree for the comparison here. + self_val == other_val + } + (Slice(self_slice), Slice(other_slice)) => self_slice.is_covered_by(*other_slice), + + // Opaque constructors don't interact with anything unless they come from the + // syntactically identical pattern. + (Opaque(self_id), Opaque(other_id)) => self_id == other_id, + (Opaque(..), _) | (_, Opaque(..)) => false, + + _ => span_bug!( + pcx.cx.scrut_span, + "trying to compare incompatible constructors {:?} and {:?}", + self, + other + ), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum VariantVisibility { + /// Variant that doesn't fit the other cases, i.e. most variants. + Visible, + /// Variant behind an unstable gate or with the `#[doc(hidden)]` attribute. It will not be + /// mentioned in diagnostics unless the user mentioned it first. + Hidden, + /// Variant that matches no value. E.g. `Some::<Option<!>>` if the `exhaustive_patterns` feature + /// is enabled. Like `Hidden`, it will not be mentioned in diagnostics unless the user mentioned + /// it first. + Empty, +} + +/// Describes the set of all constructors for a type. For details, in particular about the emptiness +/// of constructors, see the top of the file. +/// +/// In terms of division of responsibility, [`ConstructorSet::split`] handles all of the +/// `exhaustive_patterns` feature. +#[derive(Debug)] +pub enum ConstructorSet { + /// The type has a single constructor, e.g. `&T` or a struct. `empty` tracks whether the + /// constructor is empty. + Single { empty: bool }, + /// This type has the following list of constructors. If `variants` is empty and + /// `non_exhaustive` is false, don't use this; use `NoConstructors` instead. + Variants { variants: IndexVec<VariantIdx, VariantVisibility>, non_exhaustive: bool }, + /// Booleans. + Bool, + /// The type is spanned by integer values. The range or ranges give the set of allowed values. + /// The second range is only useful for `char`. + Integers { range_1: IntRange, range_2: Option<IntRange> }, + /// The type is matched by slices. `array_len` is the compile-time length of the array, if + /// known. If `subtype_is_empty`, all constructors are empty except possibly the zero-length + /// slice `[]`. + Slice { array_len: Option<usize>, subtype_is_empty: bool }, + /// The constructors cannot be listed, and the type cannot be matched exhaustively. E.g. `str`, + /// floats. + Unlistable, + /// The type has no constructors (not even empty ones). This is `!` and empty enums. + NoConstructors, +} + +/// Describes the result of analyzing the constructors in a column of a match. +/// +/// `present` is morally the set of constructors present in the column, and `missing` is the set of +/// constructors that exist in the type but are not present in the column. +/// +/// More formally, if we discard wildcards from the column, this respects the following constraints: +/// 1. the union of `present`, `missing` and `missing_empty` covers all the constructors of the type +/// 2. each constructor in `present` is covered by something in the column +/// 3. no constructor in `missing` or `missing_empty` is covered by anything in the column +/// 4. each constructor in the column is equal to the union of one or more constructors in `present` +/// 5. `missing` does not contain empty constructors (see discussion about emptiness at the top of +/// the file); +/// 6. `missing_empty` contains only empty constructors +/// 7. constructors in `present`, `missing` and `missing_empty` are split for the column; in other +/// words, they are either fully included in or fully disjoint from each constructor in the +/// column. In yet other words, there are no non-trivial intersections like between `0..10` and +/// `5..15`. +/// +/// We must be particularly careful with weird constructors like `Opaque`: they're not formally part +/// of the `ConstructorSet` for the type, yet if we forgot to include them in `present` we would be +/// ignoring any row with `Opaque`s in the algorithm. Hence the importance of point 4. +#[derive(Debug)] +pub(super) struct SplitConstructorSet<'tcx> { + pub(super) present: SmallVec<[Constructor<'tcx>; 1]>, + pub(super) missing: Vec<Constructor<'tcx>>, + pub(super) missing_empty: Vec<Constructor<'tcx>>, +} + +impl ConstructorSet { + /// Creates a set that represents all the constructors of `ty`. + /// + /// See at the top of the file for considerations of emptiness. + #[instrument(level = "debug", skip(cx), ret)] + pub fn for_ty<'p, 'tcx>(cx: &MatchCheckCtxt<'p, 'tcx>, ty: Ty<'tcx>) -> Self { + let make_range = |start, end| { + IntRange::from_range( + MaybeInfiniteInt::new_finite(cx.tcx, ty, start), + MaybeInfiniteInt::new_finite(cx.tcx, ty, end), + RangeEnd::Included, + ) + }; + // This determines the set of all possible constructors for the type `ty`. For numbers, + // arrays and slices we use ranges and variable-length slices when appropriate. + match ty.kind() { + ty::Bool => Self::Bool, + ty::Char => { + // The valid Unicode Scalar Value ranges. + Self::Integers { + range_1: make_range('\u{0000}' as u128, '\u{D7FF}' as u128), + range_2: Some(make_range('\u{E000}' as u128, '\u{10FFFF}' as u128)), + } + } + &ty::Int(ity) => { + let range = if ty.is_ptr_sized_integral() { + // The min/max values of `isize` are not allowed to be observed. + IntRange { lo: NegInfinity, hi: PosInfinity } + } else { + let bits = Integer::from_int_ty(&cx.tcx, ity).size().bits() as u128; + let min = 1u128 << (bits - 1); + let max = min - 1; + make_range(min, max) + }; + Self::Integers { range_1: range, range_2: None } + } + &ty::Uint(uty) => { + let range = if ty.is_ptr_sized_integral() { + // The max value of `usize` is not allowed to be observed. + let lo = MaybeInfiniteInt::new_finite(cx.tcx, ty, 0); + IntRange { lo, hi: PosInfinity } + } else { + let size = Integer::from_uint_ty(&cx.tcx, uty).size(); + let max = size.truncate(u128::MAX); + make_range(0, max) + }; + Self::Integers { range_1: range, range_2: None } + } + ty::Slice(sub_ty) => { + Self::Slice { array_len: None, subtype_is_empty: cx.is_uninhabited(*sub_ty) } + } + ty::Array(sub_ty, len) => { + // We treat arrays of a constant but unknown length like slices. + Self::Slice { + array_len: len.try_eval_target_usize(cx.tcx, cx.param_env).map(|l| l as usize), + subtype_is_empty: cx.is_uninhabited(*sub_ty), + } + } + ty::Adt(def, args) if def.is_enum() => { + let is_declared_nonexhaustive = cx.is_foreign_non_exhaustive_enum(ty); + if def.variants().is_empty() && !is_declared_nonexhaustive { + Self::NoConstructors + } else { + let mut variants = + IndexVec::from_elem(VariantVisibility::Visible, def.variants()); + for (idx, v) in def.variants().iter_enumerated() { + let variant_def_id = def.variant(idx).def_id; + // Visibly uninhabited variants. + let is_inhabited = v + .inhabited_predicate(cx.tcx, *def) + .instantiate(cx.tcx, args) + .apply(cx.tcx, cx.param_env, cx.module); + // Variants that depend on a disabled unstable feature. + let is_unstable = matches!( + cx.tcx.eval_stability(variant_def_id, None, DUMMY_SP, None), + EvalResult::Deny { .. } + ); + // Foreign `#[doc(hidden)]` variants. + let is_doc_hidden = + cx.tcx.is_doc_hidden(variant_def_id) && !variant_def_id.is_local(); + let visibility = if !is_inhabited { + // FIXME: handle empty+hidden + VariantVisibility::Empty + } else if is_unstable || is_doc_hidden { + VariantVisibility::Hidden + } else { + VariantVisibility::Visible + }; + variants[idx] = visibility; + } + + Self::Variants { variants, non_exhaustive: is_declared_nonexhaustive } + } + } + ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => { + Self::Single { empty: cx.is_uninhabited(ty) } + } + ty::Never => Self::NoConstructors, + // This type is one for which we cannot list constructors, like `str` or `f64`. + // FIXME(Nadrieril): which of these are actually allowed? + ty::Float(_) + | ty::Str + | ty::Foreign(_) + | ty::RawPtr(_) + | ty::FnDef(_, _) + | ty::FnPtr(_) + | ty::Dynamic(_, _, _) + | ty::Closure(_, _) + | ty::Coroutine(_, _, _) + | ty::Alias(_, _) + | ty::Param(_) + | ty::Error(_) => Self::Unlistable, + ty::CoroutineWitness(_, _) | ty::Bound(_, _) | ty::Placeholder(_) | ty::Infer(_) => { + bug!("Encountered unexpected type in `ConstructorSet::for_ty`: {ty:?}") + } + } + } + + /// This analyzes a column of constructors to 1/ determine which constructors of the type (if + /// any) are missing; 2/ split constructors to handle non-trivial intersections e.g. on ranges + /// or slices. This can get subtle; see [`SplitConstructorSet`] for details of this operation + /// and its invariants. + #[instrument(level = "debug", skip(self, pcx, ctors), ret)] + pub(super) fn split<'a, 'tcx>( + &self, + pcx: &PatCtxt<'_, '_, 'tcx>, + ctors: impl Iterator<Item = &'a Constructor<'tcx>> + Clone, + ) -> SplitConstructorSet<'tcx> + where + 'tcx: 'a, + { + let mut present: SmallVec<[_; 1]> = SmallVec::new(); + // Empty constructors found missing. + let mut missing_empty = Vec::new(); + // Nonempty constructors found missing. + let mut missing = Vec::new(); + // Constructors in `ctors`, except wildcards and opaques. + let mut seen = Vec::new(); + for ctor in ctors.cloned() { + match ctor { + Opaque(..) => present.push(ctor), + Wildcard => {} // discard wildcards + _ => seen.push(ctor), + } + } + + match self { + ConstructorSet::Single { empty } => { + if !seen.is_empty() { + present.push(Single); + } else if *empty { + missing_empty.push(Single); + } else { + missing.push(Single); + } + } + ConstructorSet::Variants { variants, non_exhaustive } => { + let seen_set: FxHashSet<_> = seen.iter().map(|c| c.as_variant().unwrap()).collect(); + let mut skipped_a_hidden_variant = false; + + for (idx, visibility) in variants.iter_enumerated() { + let ctor = Variant(idx); + if seen_set.contains(&idx) { + present.push(ctor); + } else { + // We only put visible variants directly into `missing`. + match visibility { + VariantVisibility::Visible => missing.push(ctor), + VariantVisibility::Hidden => skipped_a_hidden_variant = true, + VariantVisibility::Empty => missing_empty.push(ctor), + } + } + } + + if skipped_a_hidden_variant { + missing.push(Hidden); + } + if *non_exhaustive { + missing.push(NonExhaustive); + } + } + ConstructorSet::Bool => { + let mut seen_false = false; + let mut seen_true = false; + for b in seen.iter().map(|ctor| ctor.as_bool().unwrap()) { + if b { + seen_true = true; + } else { + seen_false = true; + } + } + if seen_false { + present.push(Bool(false)); + } else { + missing.push(Bool(false)); + } + if seen_true { + present.push(Bool(true)); + } else { + missing.push(Bool(true)); + } + } + ConstructorSet::Integers { range_1, range_2 } => { + let seen_ranges: Vec<_> = + seen.iter().map(|ctor| ctor.as_int_range().unwrap().clone()).collect(); + for (seen, splitted_range) in range_1.split(seen_ranges.iter().cloned()) { + match seen { + Presence::Unseen => missing.push(IntRange(splitted_range)), + Presence::Seen => present.push(IntRange(splitted_range)), + } + } + if let Some(range_2) = range_2 { + for (seen, splitted_range) in range_2.split(seen_ranges.into_iter()) { + match seen { + Presence::Unseen => missing.push(IntRange(splitted_range)), + Presence::Seen => present.push(IntRange(splitted_range)), + } + } + } + } + ConstructorSet::Slice { array_len, subtype_is_empty } => { + let seen_slices = seen.iter().map(|c| c.as_slice().unwrap()); + let base_slice = Slice::new(*array_len, VarLen(0, 0)); + for (seen, splitted_slice) in base_slice.split(seen_slices) { + let ctor = Slice(splitted_slice); + match seen { + Presence::Seen => present.push(ctor), + Presence::Unseen => { + if *subtype_is_empty && splitted_slice.arity() != 0 { + // We have subpatterns of an empty type, so the constructor is + // empty. + missing_empty.push(ctor); + } else { + missing.push(ctor); + } + } + } + } + } + ConstructorSet::Unlistable => { + // Since we can't list constructors, we take the ones in the column. This might list + // some constructors several times but there's not much we can do. + present.extend(seen); + missing.push(NonExhaustive); + } + ConstructorSet::NoConstructors => { + // In a `MaybeInvalid` place even an empty pattern may be reachable. We therefore + // add a dummy empty constructor here, which will be ignored if the place is + // `ValidOnly`. + missing_empty.push(NonExhaustive); + } + } + + // We have now grouped all the constructors into 3 buckets: present, missing, missing_empty. + // In the absence of the `exhaustive_patterns` feature however, we don't count nested empty + // types as empty. Only non-nested `!` or `enum Foo {}` are considered empty. + if !pcx.cx.tcx.features().exhaustive_patterns + && !(pcx.is_top_level && matches!(self, Self::NoConstructors)) + { + // Treat all missing constructors as nonempty. + missing.extend(missing_empty.drain(..)); + } + + SplitConstructorSet { present, missing, missing_empty } + } +} |
