diff options
Diffstat (limited to 'compiler')
39 files changed, 1813 insertions, 1221 deletions
diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs index e97c8cc4562..5d994c90379 100644 --- a/compiler/rustc_ast/src/util/comments.rs +++ b/compiler/rustc_ast/src/util/comments.rs @@ -25,9 +25,8 @@ pub struct Comment { /// Makes a doc string more presentable to users. /// Used by rustdoc and perhaps other tools, but not by rustc. -pub fn beautify_doc_string(data: Symbol) -> String { - /// remove whitespace-only lines from the start/end of lines - fn vertical_trim(lines: Vec<String>) -> Vec<String> { +pub fn beautify_doc_string(data: Symbol) -> Symbol { + fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> { let mut i = 0; let mut j = lines.len(); // first line of all-stars should be omitted @@ -47,55 +46,58 @@ pub fn beautify_doc_string(data: Symbol) -> String { j -= 1; } - lines[i..j].to_vec() + if i != 0 || j != lines.len() { Some((i, j)) } else { None } } - /// remove a "[ \t]*\*" block from each line, if possible - fn horizontal_trim(lines: Vec<String>) -> Vec<String> { + fn get_horizontal_trim(lines: &[&str]) -> Option<usize> { let mut i = usize::MAX; - let mut can_trim = true; let mut first = true; - for line in &lines { + for line in lines { for (j, c) in line.chars().enumerate() { if j > i || !"* \t".contains(c) { - can_trim = false; - break; + return None; } if c == '*' { if first { i = j; first = false; } else if i != j { - can_trim = false; + return None; } break; } } if i >= line.len() { - can_trim = false; - } - if !can_trim { - break; + return None; } } + Some(i) + } - if can_trim { - lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect() + let data_s = data.as_str(); + if data_s.contains('\n') { + let mut lines = data_s.lines().collect::<Vec<&str>>(); + let mut changes = false; + let lines = if let Some((i, j)) = get_vertical_trim(&lines) { + changes = true; + // remove whitespace-only lines from the start/end of lines + &mut lines[i..j] } else { - lines + &mut lines + }; + if let Some(horizontal) = get_horizontal_trim(&lines) { + changes = true; + // remove a "[ \t]*\*" block from each line, if possible + for line in lines.iter_mut() { + *line = &line[horizontal + 1..]; + } + } + if changes { + return Symbol::intern(&lines.join("\n")); } } - - let data = data.as_str(); - if data.contains('\n') { - let lines = data.lines().map(|s| s.to_string()).collect::<Vec<String>>(); - let lines = vertical_trim(lines); - let lines = horizontal_trim(lines); - lines.join("\n") - } else { - data.to_string() - } + data } /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char. diff --git a/compiler/rustc_ast/src/util/comments/tests.rs b/compiler/rustc_ast/src/util/comments/tests.rs index e19198f863b..98ab653e45f 100644 --- a/compiler/rustc_ast/src/util/comments/tests.rs +++ b/compiler/rustc_ast/src/util/comments/tests.rs @@ -6,7 +6,7 @@ fn test_block_doc_comment_1() { with_default_session_globals(|| { let comment = "\n * Test \n ** Test\n * Test\n"; let stripped = beautify_doc_string(Symbol::intern(comment)); - assert_eq!(stripped, " Test \n* Test\n Test"); + assert_eq!(stripped.as_str(), " Test \n* Test\n Test"); }) } @@ -15,7 +15,7 @@ fn test_block_doc_comment_2() { with_default_session_globals(|| { let comment = "\n * Test\n * Test\n"; let stripped = beautify_doc_string(Symbol::intern(comment)); - assert_eq!(stripped, " Test\n Test"); + assert_eq!(stripped.as_str(), " Test\n Test"); }) } @@ -24,7 +24,7 @@ fn test_block_doc_comment_3() { with_default_session_globals(|| { let comment = "\n let a: *i32;\n *a = 5;\n"; let stripped = beautify_doc_string(Symbol::intern(comment)); - assert_eq!(stripped, " let a: *i32;\n *a = 5;"); + assert_eq!(stripped.as_str(), " let a: *i32;\n *a = 5;"); }) } @@ -32,12 +32,12 @@ fn test_block_doc_comment_3() { fn test_line_doc_comment() { with_default_session_globals(|| { let stripped = beautify_doc_string(Symbol::intern(" test")); - assert_eq!(stripped, " test"); + assert_eq!(stripped.as_str(), " test"); let stripped = beautify_doc_string(Symbol::intern("! test")); - assert_eq!(stripped, "! test"); + assert_eq!(stripped.as_str(), "! test"); let stripped = beautify_doc_string(Symbol::intern("test")); - assert_eq!(stripped, "test"); + assert_eq!(stripped.as_str(), "test"); let stripped = beautify_doc_string(Symbol::intern("!test")); - assert_eq!(stripped, "!test"); + assert_eq!(stripped.as_str(), "!test"); }) } diff --git a/compiler/rustc_builtin_macros/src/deriving/generic/mod.rs b/compiler/rustc_builtin_macros/src/deriving/generic/mod.rs index 68c11868af8..6531e68be9c 100644 --- a/compiler/rustc_builtin_macros/src/deriving/generic/mod.rs +++ b/compiler/rustc_builtin_macros/src/deriving/generic/mod.rs @@ -257,7 +257,10 @@ pub struct Substructure<'a> { pub type_ident: Ident, /// ident of the method pub method_ident: Ident, - /// dereferenced access to any `Self_` or `Ptr(Self_, _)` arguments + /// dereferenced access to any [`Self_`] or [`Ptr(Self_, _)][ptr]` arguments + /// + /// [`Self_`]: ty::Ty::Self_ + /// [ptr]: ty::Ty::Ptr pub self_args: &'a [P<Expr>], /// verbatim access to any other arguments pub nonself_args: &'a [P<Expr>], diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index ccd4d103ddb..a3a2ef04175 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -2,7 +2,7 @@ use rustc_data_structures::fx::FxHashSet; use rustc_data_structures::temp_dir::MaybeTempDir; use rustc_fs_util::fix_windows_verbatim_for_gcc; use rustc_hir::def_id::CrateNum; -use rustc_middle::middle::cstore::{EncodedMetadata, LibSource, NativeLib}; +use rustc_middle::middle::cstore::{EncodedMetadata, LibSource}; use rustc_middle::middle::dependency_format::Linkage; use rustc_session::config::{self, CFGuard, CrateType, DebugInfo}; use rustc_session::config::{OutputFilenames, OutputType, PrintRequest, SanitizerSet}; @@ -22,7 +22,8 @@ use super::command::Command; use super::linker::{self, Linker}; use super::rpath::{self, RPathConfig}; use crate::{ - looks_like_rust_object_file, CodegenResults, CompiledModule, CrateInfo, METADATA_FILENAME, + looks_like_rust_object_file, CodegenResults, CompiledModule, CrateInfo, NativeLib, + METADATA_FILENAME, }; use cc::windows_registry; diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 21138f967a2..18132a2c7a3 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -766,7 +766,7 @@ impl CrateInfo { profiler_runtime: None, is_no_builtins: Default::default(), native_libraries: Default::default(), - used_libraries: tcx.native_libraries(LOCAL_CRATE), + used_libraries: tcx.native_libraries(LOCAL_CRATE).iter().map(Into::into).collect(), link_args: tcx.link_args(LOCAL_CRATE), crate_name: Default::default(), used_crates_dynamic: cstore::used_crates(tcx, LinkagePreference::RequireDynamic), @@ -787,7 +787,8 @@ impl CrateInfo { info.missing_lang_items.reserve(n_crates); for &cnum in crates.iter() { - info.native_libraries.insert(cnum, tcx.native_libraries(cnum)); + info.native_libraries + .insert(cnum, tcx.native_libraries(cnum).iter().map(Into::into).collect()); info.crate_name.insert(cnum, tcx.crate_name(cnum).to_string()); info.used_crate_source.insert(cnum, tcx.used_crate_source(cnum)); if tcx.is_panic_runtime(cnum) { diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index ee889d55241..bc93bd8b7bd 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -21,15 +21,17 @@ extern crate tracing; #[macro_use] extern crate rustc_middle; +use rustc_ast as ast; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::sync::Lrc; use rustc_hir::def_id::CrateNum; use rustc_hir::LangItem; use rustc_middle::dep_graph::WorkProduct; -use rustc_middle::middle::cstore::{CrateSource, LibSource, NativeLib}; +use rustc_middle::middle::cstore::{self, CrateSource, LibSource}; use rustc_middle::middle::dependency_format::Dependencies; use rustc_middle::ty::query::Providers; use rustc_session::config::{OutputFilenames, OutputType, RUST_CGU_EXT}; +use rustc_session::utils::NativeLibKind; use rustc_span::symbol::Symbol; use std::path::{Path, PathBuf}; @@ -105,6 +107,19 @@ bitflags::bitflags! { } } +#[derive(Clone, Debug, Encodable, Decodable, HashStable)] +pub struct NativeLib { + pub kind: NativeLibKind, + pub name: Option<Symbol>, + pub cfg: Option<ast::MetaItem>, +} + +impl From<&cstore::NativeLib> for NativeLib { + fn from(lib: &cstore::NativeLib) -> Self { + NativeLib { kind: lib.kind.clone(), name: lib.name.clone(), cfg: lib.cfg.clone() } + } +} + /// Misc info we load from metadata to persist beyond the tcx. /// /// Note: though `CrateNum` is only meaningful within the same tcx, information within `CrateInfo` @@ -119,9 +134,9 @@ pub struct CrateInfo { pub compiler_builtins: Option<CrateNum>, pub profiler_runtime: Option<CrateNum>, pub is_no_builtins: FxHashSet<CrateNum>, - pub native_libraries: FxHashMap<CrateNum, Lrc<Vec<NativeLib>>>, + pub native_libraries: FxHashMap<CrateNum, Vec<NativeLib>>, pub crate_name: FxHashMap<CrateNum, String>, - pub used_libraries: Lrc<Vec<NativeLib>>, + pub used_libraries: Vec<NativeLib>, pub link_args: Lrc<Vec<String>>, pub used_crate_source: FxHashMap<CrateNum, Lrc<CrateSource>>, pub used_crates_static: Vec<(CrateNum, LibSource)>, diff --git a/compiler/rustc_interface/src/interface.rs b/compiler/rustc_interface/src/interface.rs index acd49d86c2c..28eb1fed6a0 100644 --- a/compiler/rustc_interface/src/interface.rs +++ b/compiler/rustc_interface/src/interface.rs @@ -25,8 +25,9 @@ use std::sync::{Arc, Mutex}; pub type Result<T> = result::Result<T, ErrorReported>; /// Represents a compiler session. +/// /// Can be used to run `rustc_interface` queries. -/// Created by passing `Config` to `run_compiler`. +/// Created by passing [`Config`] to [`run_compiler`]. pub struct Compiler { pub(crate) sess: Lrc<Session>, codegen_backend: Lrc<Box<dyn CodegenBackend>>, diff --git a/compiler/rustc_interface/src/passes.rs b/compiler/rustc_interface/src/passes.rs index a8c8690b9e7..61ebd6d2198 100644 --- a/compiler/rustc_interface/src/passes.rs +++ b/compiler/rustc_interface/src/passes.rs @@ -96,7 +96,7 @@ declare_box_region_type!( /// harness if one is to be provided, injection of a dependency on the /// standard library and prelude, and name resolution. /// -/// Returns `None` if we're aborting after handling -W help. +/// Returns [`None`] if we're aborting after handling -W help. pub fn configure_and_expand( sess: Lrc<Session>, lint_store: Lrc<LintStore>, diff --git a/compiler/rustc_interface/src/queries.rs b/compiler/rustc_interface/src/queries.rs index 4c340b3fc1f..6ea0828cea0 100644 --- a/compiler/rustc_interface/src/queries.rs +++ b/compiler/rustc_interface/src/queries.rs @@ -23,7 +23,11 @@ use std::cell::{Ref, RefCell, RefMut}; use std::rc::Rc; /// Represent the result of a query. -/// This result can be stolen with the `take` method and generated with the `compute` method. +/// +/// This result can be stolen with the [`take`] method and generated with the [`compute`] method. +/// +/// [`take`]: Self::take +/// [`compute`]: Self::compute pub struct Query<T> { result: RefCell<Option<Result<T>>>, } diff --git a/compiler/rustc_lint/src/levels.rs b/compiler/rustc_lint/src/levels.rs index 3e22eba15aa..5cece569903 100644 --- a/compiler/rustc_lint/src/levels.rs +++ b/compiler/rustc_lint/src/levels.rs @@ -12,7 +12,9 @@ use rustc_hir::{intravisit, HirId}; use rustc_middle::hir::map::Map; use rustc_middle::lint::LevelSource; use rustc_middle::lint::LintDiagnosticBuilder; -use rustc_middle::lint::{struct_lint_level, LintLevelMap, LintLevelSets, LintSet, LintSource}; +use rustc_middle::lint::{ + struct_lint_level, LintLevelMap, LintLevelSets, LintLevelSource, LintSet, +}; use rustc_middle::ty::query::Providers; use rustc_middle::ty::TyCtxt; use rustc_session::lint::{builtin, Level, Lint, LintId}; @@ -91,7 +93,7 @@ impl<'s> LintLevelsBuilder<'s> { }; for id in ids { self.check_gated_lint(id, DUMMY_SP); - let src = LintSource::CommandLine(lint_flag_val, orig_level); + let src = LintLevelSource::CommandLine(lint_flag_val, orig_level); specs.insert(id, (level, src)); } } @@ -128,19 +130,19 @@ impl<'s> LintLevelsBuilder<'s> { ); diag_builder.span_label(src.span(), "overruled by previous forbid"); match old_src { - LintSource::Default => { + LintLevelSource::Default => { diag_builder.note(&format!( "`forbid` lint level is the default for {}", id.to_string() )); } - LintSource::Node(_, forbid_source_span, reason) => { + LintLevelSource::Node(_, forbid_source_span, reason) => { diag_builder.span_label(forbid_source_span, "`forbid` level set here"); if let Some(rationale) = reason { diag_builder.note(&rationale.as_str()); } } - LintSource::CommandLine(_, _) => { + LintLevelSource::CommandLine(_, _) => { diag_builder.note("`forbid` lint level was set on command line"); } } @@ -276,7 +278,7 @@ impl<'s> LintLevelsBuilder<'s> { let name = meta_item.path.segments.last().expect("empty lint name").ident.name; match store.check_lint_name(&name.as_str(), tool_name) { CheckLintNameResult::Ok(ids) => { - let src = LintSource::Node(name, li.span(), reason); + let src = LintLevelSource::Node(name, li.span(), reason); for &id in ids { self.check_gated_lint(id, attr.span); self.insert_spec(&mut specs, id, (level, src)); @@ -287,7 +289,7 @@ impl<'s> LintLevelsBuilder<'s> { match result { Ok(ids) => { let complete_name = &format!("{}::{}", tool_name.unwrap(), name); - let src = LintSource::Node( + let src = LintLevelSource::Node( Symbol::intern(complete_name), li.span(), reason, @@ -324,7 +326,7 @@ impl<'s> LintLevelsBuilder<'s> { }, ); - let src = LintSource::Node( + let src = LintLevelSource::Node( Symbol::intern(&new_lint_name), li.span(), reason, @@ -403,7 +405,7 @@ impl<'s> LintLevelsBuilder<'s> { } let (lint_attr_name, lint_attr_span) = match *src { - LintSource::Node(name, span, _) => (name, span), + LintLevelSource::Node(name, span, _) => (name, span), _ => continue, }; @@ -460,7 +462,7 @@ impl<'s> LintLevelsBuilder<'s> { } /// Find the lint level for a lint. - pub fn lint_level(&self, lint: &'static Lint) -> (Level, LintSource) { + pub fn lint_level(&self, lint: &'static Lint) -> (Level, LintLevelSource) { self.sets.get_lint_level(lint, self.cur, None, self.sess) } diff --git a/compiler/rustc_middle/src/dep_graph/mod.rs b/compiler/rustc_middle/src/dep_graph/mod.rs index e641c1cd77b..728bfef9f46 100644 --- a/compiler/rustc_middle/src/dep_graph/mod.rs +++ b/compiler/rustc_middle/src/dep_graph/mod.rs @@ -5,7 +5,7 @@ use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::sync::Lock; use rustc_data_structures::thin_vec::ThinVec; use rustc_errors::Diagnostic; -use rustc_hir::def_id::{DefPathHash, LocalDefId}; +use rustc_hir::def_id::LocalDefId; mod dep_node; @@ -91,9 +91,9 @@ impl<'tcx> DepContext for TyCtxt<'tcx> { type DepKind = DepKind; type StableHashingContext = StableHashingContext<'tcx>; - fn register_reused_dep_path_hash(&self, hash: DefPathHash) { + fn register_reused_dep_node(&self, dep_node: &DepNode) { if let Some(cache) = self.queries.on_disk_cache.as_ref() { - cache.register_reused_dep_path_hash(*self, hash) + cache.register_reused_dep_node(*self, dep_node) } } diff --git a/compiler/rustc_middle/src/lib.rs b/compiler/rustc_middle/src/lib.rs index cdc5940d9ba..6ae83a7f667 100644 --- a/compiler/rustc_middle/src/lib.rs +++ b/compiler/rustc_middle/src/lib.rs @@ -8,7 +8,7 @@ //! - **MIR.** The "mid-level (M) intermediate representation (IR)" is //! defined in the `mir` module. This module contains only the //! *definition* of the MIR; the passes that transform and operate -//! on MIR are found in `librustc_mir` crate. +//! on MIR are found in `rustc_mir` crate. //! - **Types.** The internal representation of types used in rustc is //! defined in the `ty` module. This includes the **type context** //! (or `tcx`), which is the central context during most of diff --git a/compiler/rustc_middle/src/lint.rs b/compiler/rustc_middle/src/lint.rs index a61d37cc90e..64d850192f4 100644 --- a/compiler/rustc_middle/src/lint.rs +++ b/compiler/rustc_middle/src/lint.rs @@ -13,7 +13,7 @@ use rustc_span::{symbol, Span, Symbol, DUMMY_SP}; /// How a lint level was set. #[derive(Clone, Copy, PartialEq, Eq, HashStable)] -pub enum LintSource { +pub enum LintLevelSource { /// Lint is at the default level as declared /// in rustc or a plugin. Default, @@ -22,30 +22,31 @@ pub enum LintSource { Node(Symbol, Span, Option<Symbol> /* RFC 2383 reason */), /// Lint level was set by a command-line flag. - /// The provided `Level` is the level specified on the command line - - /// the actual level may be lower due to `--cap-lints` + /// The provided `Level` is the level specified on the command line. + /// (The actual level may be lower due to `--cap-lints`.) CommandLine(Symbol, Level), } -impl LintSource { +impl LintLevelSource { pub fn name(&self) -> Symbol { match *self { - LintSource::Default => symbol::kw::Default, - LintSource::Node(name, _, _) => name, - LintSource::CommandLine(name, _) => name, + LintLevelSource::Default => symbol::kw::Default, + LintLevelSource::Node(name, _, _) => name, + LintLevelSource::CommandLine(name, _) => name, } } pub fn span(&self) -> Span { match *self { - LintSource::Default => DUMMY_SP, - LintSource::Node(_, span, _) => span, - LintSource::CommandLine(_, _) => DUMMY_SP, + LintLevelSource::Default => DUMMY_SP, + LintLevelSource::Node(_, span, _) => span, + LintLevelSource::CommandLine(_, _) => DUMMY_SP, } } } -pub type LevelSource = (Level, LintSource); +/// A tuple of a lint level and its source. +pub type LevelSource = (Level, LintLevelSource); pub struct LintLevelSets { pub list: Vec<LintSet>, @@ -113,7 +114,7 @@ impl LintLevelSets { id: LintId, mut idx: u32, aux: Option<&FxHashMap<LintId, LevelSource>>, - ) -> (Option<Level>, LintSource) { + ) -> (Option<Level>, LintLevelSource) { if let Some(specs) = aux { if let Some(&(level, src)) = specs.get(&id) { return (Some(level), src); @@ -125,7 +126,7 @@ impl LintLevelSets { if let Some(&(level, src)) = specs.get(&id) { return (Some(level), src); } - return (None, LintSource::Default); + return (None, LintLevelSource::Default); } LintSet::Node { ref specs, parent } => { if let Some(&(level, src)) = specs.get(&id) { @@ -213,7 +214,7 @@ pub fn struct_lint_level<'s, 'd>( sess: &'s Session, lint: &'static Lint, level: Level, - src: LintSource, + src: LintLevelSource, span: Option<MultiSpan>, decorate: impl for<'a> FnOnce(LintDiagnosticBuilder<'a>) + 'd, ) { @@ -223,7 +224,7 @@ pub fn struct_lint_level<'s, 'd>( sess: &'s Session, lint: &'static Lint, level: Level, - src: LintSource, + src: LintLevelSource, span: Option<MultiSpan>, decorate: Box<dyn for<'b> FnOnce(LintDiagnosticBuilder<'b>) + 'd>, ) { @@ -274,14 +275,14 @@ pub fn struct_lint_level<'s, 'd>( let name = lint.name_lower(); match src { - LintSource::Default => { + LintLevelSource::Default => { sess.diag_note_once( &mut err, DiagnosticMessageId::from(lint), &format!("`#[{}({})]` on by default", level.as_str(), name), ); } - LintSource::CommandLine(lint_flag_val, orig_level) => { + LintLevelSource::CommandLine(lint_flag_val, orig_level) => { let flag = match orig_level { Level::Warn => "-W", Level::Deny => "-D", @@ -310,7 +311,7 @@ pub fn struct_lint_level<'s, 'd>( ); } } - LintSource::Node(lint_attr_name, src, reason) => { + LintLevelSource::Node(lint_attr_name, src, reason) => { if let Some(rationale) = reason { err.note(&rationale.as_str()); } diff --git a/compiler/rustc_middle/src/middle/privacy.rs b/compiler/rustc_middle/src/middle/privacy.rs index 254b57a005e..54188985d7c 100644 --- a/compiler/rustc_middle/src/middle/privacy.rs +++ b/compiler/rustc_middle/src/middle/privacy.rs @@ -8,7 +8,9 @@ use rustc_macros::HashStable; use std::fmt; use std::hash::Hash; -// Accessibility levels, sorted in ascending order +/// Represents the levels of accessibility an item can have. +/// +/// The variants are sorted in ascending order of accessibility. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, HashStable)] pub enum AccessLevel { /// Superset of `AccessLevel::Reachable` used to mark impl Trait items. @@ -18,13 +20,13 @@ pub enum AccessLevel { /// public, then type `T` is reachable. Its values can be obtained by other crates /// even if the type itself is not nameable. Reachable, - /// Public items + items accessible to other crates with help of `pub use` re-exports + /// Public items + items accessible to other crates with the help of `pub use` re-exports. Exported, - /// Items accessible to other crates directly, without help of re-exports + /// Items accessible to other crates directly, without the help of re-exports. Public, } -// Accessibility levels for reachable HIR nodes +/// Holds a map of accessibility levels for reachable HIR nodes. #[derive(Clone)] pub struct AccessLevels<Id = HirId> { pub map: FxHashMap<Id, AccessLevel>, diff --git a/compiler/rustc_middle/src/middle/region.rs b/compiler/rustc_middle/src/middle/region.rs index d060549ca81..eb48198991c 100644 --- a/compiler/rustc_middle/src/middle/region.rs +++ b/compiler/rustc_middle/src/middle/region.rs @@ -332,7 +332,7 @@ pub struct ScopeTree { pub struct YieldData { /// The `Span` of the yield. pub span: Span, - /// The number of expressions and patterns appearing before the `yield` in the body plus one. + /// The number of expressions and patterns appearing before the `yield` in the body, plus one. pub expr_and_pat_count: usize, pub source: hir::YieldSource, } @@ -449,9 +449,7 @@ impl ScopeTree { } /// Checks whether the given scope contains a `yield`. If so, - /// returns `Some((span, expr_count))` with the span of a yield we found and - /// the number of expressions and patterns appearing before the `yield` in the body + 1. - /// If there a are multiple yields in a scope, the one with the highest number is returned. + /// returns `Some(YieldData)`. If not, returns `None`. pub fn yield_in_scope(&self, scope: Scope) -> Option<YieldData> { self.yield_in_scope.get(&scope).cloned() } diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 4205e2ca5aa..9b944f202a9 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -5,7 +5,7 @@ use crate::dep_graph::{self, DepGraph, DepKind, DepNode, DepNodeExt}; use crate::hir::exports::ExportMap; use crate::ich::{NodeIdHashingMode, StableHashingContext}; use crate::infer::canonical::{Canonical, CanonicalVarInfo, CanonicalVarInfos}; -use crate::lint::{struct_lint_level, LintDiagnosticBuilder, LintSource}; +use crate::lint::{struct_lint_level, LintDiagnosticBuilder, LintLevelSource}; use crate::middle; use crate::middle::cstore::{CrateStoreDyn, EncodedMetadata}; use crate::middle::resolve_lifetime::{self, ObjectLifetimeDefault}; @@ -2559,7 +2559,7 @@ impl<'tcx> TyCtxt<'tcx> { self, lint: &'static Lint, mut id: hir::HirId, - ) -> (Level, LintSource) { + ) -> (Level, LintLevelSource) { let sets = self.lint_levels(LOCAL_CRATE); loop { if let Some(pair) = sets.level_and_source(lint, id, self.sess) { diff --git a/compiler/rustc_middle/src/ty/inhabitedness/def_id_forest.rs b/compiler/rustc_middle/src/ty/inhabitedness/def_id_forest.rs index ee6b06a1cc8..d9aebfc8293 100644 --- a/compiler/rustc_middle/src/ty/inhabitedness/def_id_forest.rs +++ b/compiler/rustc_middle/src/ty/inhabitedness/def_id_forest.rs @@ -17,7 +17,7 @@ pub struct DefIdForest { /// If A and B are DefIds in the `DefIdForest`, and A is a descendant /// of B, then only B will be in `root_ids`. /// We use a `SmallVec` here because (for its use for caching inhabitedness) - /// its rare that this will contain even two IDs. + /// it's rare that this will contain even two IDs. root_ids: SmallVec<[DefId; 1]>, } diff --git a/compiler/rustc_middle/src/ty/list.rs b/compiler/rustc_middle/src/ty/list.rs index 83a2bdf90f9..e657088a5e4 100644 --- a/compiler/rustc_middle/src/ty/list.rs +++ b/compiler/rustc_middle/src/ty/list.rs @@ -24,7 +24,7 @@ extern "C" { /// This means we can use pointer for both /// equality comparisons and hashing. /// -/// Unlike slices, The types contained in `List` are expected to be `Copy` +/// Unlike slices, the types contained in `List` are expected to be `Copy` /// and iterating over a `List` returns `T` instead of a reference. /// /// Note: `Slice` was already taken by the `Ty`. diff --git a/compiler/rustc_middle/src/ty/mod.rs b/compiler/rustc_middle/src/ty/mod.rs index c163a37c5a1..8395692446d 100644 --- a/compiler/rustc_middle/src/ty/mod.rs +++ b/compiler/rustc_middle/src/ty/mod.rs @@ -1,3 +1,14 @@ +//! Defines how the compiler represents types internally. +//! +//! Two important entities in this module are: +//! +//! - [`rustc_middle::ty::Ty`], used to represent the semantics of a type. +//! - [`rustc_middle::ty::TyCtxt`], the central data structure in the compiler. +//! +//! For more information, see ["The `ty` module: representing types"] in the ructc-dev-guide. +//! +//! ["The `ty` module: representing types"]: https://rustc-dev-guide.rust-lang.org/ty.html + // ignore-tidy-filelength pub use self::fold::{TypeFoldable, TypeFolder, TypeVisitor}; pub use self::AssocItemContainer::*; diff --git a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs index e006dfeb663..8a1165bbd64 100644 --- a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs @@ -1,4 +1,4 @@ -use crate::dep_graph::{DepNodeIndex, SerializedDepNodeIndex}; +use crate::dep_graph::{DepNode, DepNodeIndex, SerializedDepNodeIndex}; use crate::mir::interpret::{AllocDecodingSession, AllocDecodingState}; use crate::mir::{self, interpret}; use crate::ty::codec::{OpaqueEncoder, RefDecodable, TyDecoder, TyEncoder}; @@ -264,6 +264,13 @@ impl<'sess> OnDiskCache<'sess> { (file_to_file_index, file_index_to_stable_id) }; + // Register any dep nodes that we reused from the previous session, + // but didn't `DepNode::construct` in this session. This ensures + // that their `DefPathHash` to `RawDefId` mappings are registered + // in 'latest_foreign_def_path_hashes' if necessary, since that + // normally happens in `DepNode::construct`. + tcx.dep_graph.register_reused_dep_nodes(tcx); + // Load everything into memory so we can write it out to the on-disk // cache. The vast majority of cacheable query results should already // be in memory, so this should be a cheap operation. @@ -467,8 +474,8 @@ impl<'sess> OnDiskCache<'sess> { .insert(hash, RawDefId { krate: def_id.krate.as_u32(), index: def_id.index.as_u32() }); } - /// If the given `hash` still exists in the current compilation, - /// calls `store_foreign_def_id` with its current `DefId`. + /// If the given `dep_node`'s hash still exists in the current compilation, + /// and its current `DefId` is foreign, calls `store_foreign_def_id` with it. /// /// Normally, `store_foreign_def_id_hash` can be called directly by /// the dependency graph when we construct a `DepNode`. However, @@ -476,13 +483,24 @@ impl<'sess> OnDiskCache<'sess> { /// session, we only have the `DefPathHash` available. This method is used /// to that any `DepNode` that we re-use has a `DefPathHash` -> `RawId` written /// out for usage in the next compilation session. - pub fn register_reused_dep_path_hash(&self, tcx: TyCtxt<'tcx>, hash: DefPathHash) { - // We can't simply copy the `RawDefId` from `foreign_def_path_hashes` to - // `latest_foreign_def_path_hashes`, since the `RawDefId` might have - // changed in the current compilation session (e.g. we've added/removed crates, - // or added/removed definitions before/after the target definition). - if let Some(def_id) = self.def_path_hash_to_def_id(tcx, hash) { - self.store_foreign_def_id_hash(def_id, hash); + pub fn register_reused_dep_node(&self, tcx: TyCtxt<'tcx>, dep_node: &DepNode) { + // For reused dep nodes, we only need to store the mapping if the node + // is one whose query key we can reconstruct from the hash. We use the + // mapping to aid that reconstruction in the next session. While we also + // use it to decode `DefId`s we encoded in the cache as `DefPathHashes`, + // they're already registered during `DefId` encoding. + if dep_node.kind.can_reconstruct_query_key() { + let hash = DefPathHash(dep_node.hash.into()); + + // We can't simply copy the `RawDefId` from `foreign_def_path_hashes` to + // `latest_foreign_def_path_hashes`, since the `RawDefId` might have + // changed in the current compilation session (e.g. we've added/removed crates, + // or added/removed definitions before/after the target definition). + if let Some(def_id) = self.def_path_hash_to_def_id(tcx, hash) { + if !def_id.is_local() { + self.store_foreign_def_id_hash(def_id, hash); + } + } } } @@ -648,7 +666,7 @@ impl<'sess> OnDiskCache<'sess> { //- DECODING ------------------------------------------------------------------- -/// A decoder that can read from the incr. comp. cache. It is similar to the one +/// A decoder that can read from the incremental compilation cache. It is similar to the one /// we use for crate metadata decoding in that it can rebase spans and eventually /// will also handle things that contain `Ty` instances. crate struct CacheDecoder<'a, 'tcx> { @@ -936,7 +954,7 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for &'tcx [Span] { //- ENCODING ------------------------------------------------------------------- -/// An encoder that can write the incr. comp. cache. +/// An encoder that can write to the incremental compilation cache. struct CacheEncoder<'a, 'tcx, E: OpaqueEncoder> { tcx: TyCtxt<'tcx>, encoder: &'a mut E, diff --git a/compiler/rustc_middle/src/ty/sty.rs b/compiler/rustc_middle/src/ty/sty.rs index dc72a713a7d..4ce76409c6f 100644 --- a/compiler/rustc_middle/src/ty/sty.rs +++ b/compiler/rustc_middle/src/ty/sty.rs @@ -215,10 +215,7 @@ pub enum TyKind<'tcx> { impl TyKind<'tcx> { #[inline] pub fn is_primitive(&self) -> bool { - match self { - Bool | Char | Int(_) | Uint(_) | Float(_) => true, - _ => false, - } + matches!(self, Bool | Char | Int(_) | Uint(_) | Float(_)) } /// Get the article ("a" or "an") to use with this type. @@ -1572,17 +1569,11 @@ impl RegionKind { } pub fn is_late_bound(&self) -> bool { - match *self { - ty::ReLateBound(..) => true, - _ => false, - } + matches!(*self, ty::ReLateBound(..)) } pub fn is_placeholder(&self) -> bool { - match *self { - ty::RePlaceholder(..) => true, - _ => false, - } + matches!(*self, ty::RePlaceholder(..)) } pub fn bound_at_or_above_binder(&self, index: ty::DebruijnIndex) -> bool { diff --git a/compiler/rustc_mir/src/transform/coverage/mod.rs b/compiler/rustc_mir/src/transform/coverage/mod.rs index 4590d37c182..93133e9b7f0 100644 --- a/compiler/rustc_mir/src/transform/coverage/mod.rs +++ b/compiler/rustc_mir/src/transform/coverage/mod.rs @@ -30,6 +30,7 @@ use rustc_middle::mir::{ }; use rustc_middle::ty::TyCtxt; use rustc_span::def_id::DefId; +use rustc_span::source_map::SourceMap; use rustc_span::{CharPos, Pos, SourceFile, Span, Symbol}; /// A simple error message wrapper for `coverage::Error`s. @@ -311,7 +312,7 @@ impl<'a, 'tcx> Instrumentor<'a, 'tcx> { self.mir_body, counter_kind, self.bcb_leader_bb(bcb), - Some(make_code_region(file_name, &self.source_file, span, body_span)), + Some(make_code_region(source_map, file_name, &self.source_file, span, body_span)), ); } } @@ -489,6 +490,7 @@ fn inject_intermediate_expression(mir_body: &mut mir::Body<'tcx>, expression: Co /// Convert the Span into its file name, start line and column, and end line and column fn make_code_region( + source_map: &SourceMap, file_name: Symbol, source_file: &Lrc<SourceFile>, span: Span, @@ -508,6 +510,8 @@ fn make_code_region( } else { source_file.lookup_file_pos(span.hi()) }; + let start_line = source_map.doctest_offset_line(&source_file.name, start_line); + let end_line = source_map.doctest_offset_line(&source_file.name, end_line); CodeRegion { file_name, start_line: start_line as u32, diff --git a/compiler/rustc_mir/src/transform/simplify_try.rs b/compiler/rustc_mir/src/transform/simplify_try.rs index bea95bf43d2..a3459887a9a 100644 --- a/compiler/rustc_mir/src/transform/simplify_try.rs +++ b/compiler/rustc_mir/src/transform/simplify_try.rs @@ -306,7 +306,7 @@ fn optimization_applies<'tcx>( return false; } - // Verify the assigment chain consists of the form b = a; c = b; d = c; etc... + // Verify the assignment chain consists of the form b = a; c = b; d = c; etc... if opt_info.field_tmp_assignments.is_empty() { trace!("NO: no assignments found"); return false; diff --git a/compiler/rustc_mir/src/util/borrowck_errors.rs b/compiler/rustc_mir/src/util/borrowck_errors.rs index 83bf7584f2e..56d8045813c 100644 --- a/compiler/rustc_mir/src/util/borrowck_errors.rs +++ b/compiler/rustc_mir/src/util/borrowck_errors.rs @@ -68,9 +68,10 @@ impl<'cx, 'tcx> crate::borrow_check::MirBorrowckCtxt<'cx, 'tcx> { err.span_label( new_loan_span, format!( - "mutable borrow starts here in previous \ - iteration of loop{}", - opt_via + "{}{} was mutably borrowed here in the previous iteration of the loop{}", + desc, + via(opt_via), + opt_via, ), ); if let Some(old_load_end_span) = old_load_end_span { diff --git a/compiler/rustc_mir/src/util/pretty.rs b/compiler/rustc_mir/src/util/pretty.rs index b6a1b652cf6..89ce29bd101 100644 --- a/compiler/rustc_mir/src/util/pretty.rs +++ b/compiler/rustc_mir/src/util/pretty.rs @@ -17,7 +17,7 @@ use rustc_middle::mir::interpret::{ }; use rustc_middle::mir::visit::Visitor; use rustc_middle::mir::*; -use rustc_middle::ty::{self, TyCtxt, TypeFoldable, TypeVisitor}; +use rustc_middle::ty::{self, TyCtxt, TyS, TypeFoldable, TypeVisitor}; use rustc_target::abi::Size; use std::ops::ControlFlow; @@ -408,6 +408,18 @@ impl ExtraComments<'tcx> { } } +fn use_verbose(ty: &&TyS<'tcx>) -> bool { + match ty.kind() { + ty::Int(_) | ty::Uint(_) | ty::Bool | ty::Char | ty::Float(_) => false, + // Unit type + ty::Tuple(g_args) if g_args.is_empty() => false, + ty::Tuple(g_args) => g_args.iter().any(|g_arg| use_verbose(&g_arg.expect_ty())), + ty::Array(ty, _) => use_verbose(ty), + ty::FnDef(..) => false, + _ => true, + } +} + impl Visitor<'tcx> for ExtraComments<'tcx> { fn visit_constant(&mut self, constant: &Constant<'tcx>, location: Location) { self.super_constant(constant, location); @@ -430,16 +442,10 @@ impl Visitor<'tcx> for ExtraComments<'tcx> { fn visit_const(&mut self, constant: &&'tcx ty::Const<'tcx>, _: Location) { self.super_const(constant); let ty::Const { ty, val, .. } = constant; - match ty.kind() { - ty::Int(_) | ty::Uint(_) | ty::Bool | ty::Char | ty::Float(_) => {} - // Unit type - ty::Tuple(tys) if tys.is_empty() => {} - ty::FnDef(..) => {} - _ => { - self.push("ty::Const"); - self.push(&format!("+ ty: {:?}", ty)); - self.push(&format!("+ val: {:?}", val)); - } + if use_verbose(ty) { + self.push("ty::Const"); + self.push(&format!("+ ty: {:?}", ty)); + self.push(&format!("+ val: {:?}", val)); } } diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index dd0c61a2882..d79dd97a69a 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -1,6 +1,47 @@ -//! This module provides functions to deconstruct and reconstruct patterns into a constructor -//! applied to some fields. This is used by the `_match` module to compute pattern -//! usefulness/exhaustiveness. +//! [`super::usefulness`] explains most of what is happening in this file. As explained there, +//! values and patterns are made from constructors applied to fields. This file defines a +//! `Constructor` enum, a `Fields` struct, and various operations to manipulate them and convert +//! them from/to patterns. +//! +//! There's one idea that is not detailed in [`super::usefulness`] because the details are not +//! needed there: _constructor splitting_. +//! +//! # Constructor splitting +//! +//! The idea is as follows: given a constructor `c` and a matrix, we want to specialize in turn +//! with all the value constructors that are covered by `c`, and compute usefulness for each. +//! Instead of listing all those constructors (which is intractable), we group those value +//! constructors together as much as possible. Example: +//! +//! ``` +//! match (0, false) { +//! (0 ..=100, true) => {} // `p_1` +//! (50..=150, false) => {} // `p_2` +//! (0 ..=200, _) => {} // `q` +//! } +//! ``` +//! +//! The naive approach would try all numbers in the range `0..=200`. But we can be a lot more +//! clever: `0` and `1` for example will match the exact same rows, and return equivalent +//! witnesses. In fact all of `0..50` would. We can thus restrict our exploration to 4 +//! constructors: `0..50`, `50..=100`, `101..=150` and `151..=200`. That is enough and infinitely +//! more tractable. +//! +//! We capture this idea in a function `split(p_1 ... p_n, c)` which returns a list of constructors +//! `c'` covered by `c`. Given such a `c'`, we require that all value ctors `c''` covered by `c'` +//! return an equivalent set of witnesses after specializing and computing usefulness. +//! In the example above, witnesses for specializing by `c''` covered by `0..50` will only differ +//! in their first element. +//! +//! We usually also ask that the `c'` together cover all of the original `c`. However we allow +//! skipping some constructors as long as it doesn't change whether the resulting list of witnesses +//! is empty of not. We use this in the wildcard `_` case. +//! +//! Splitting is implemented in the [`Constructor::split`] function. We don't do splitting for +//! or-patterns; instead we just try the alternatives one-by-one. For details on splitting +//! wildcards, see [`SplitWildcard`]; for integer ranges, see [`SplitIntRange`]; for slices, see +//! [`SplitVarLenSlice`]. + use self::Constructor::*; use self::SliceKind::*; @@ -24,7 +65,7 @@ use rustc_target::abi::{Integer, Size, VariantIdx}; use smallvec::{smallvec, SmallVec}; use std::cmp::{self, max, min, Ordering}; -use std::iter::IntoIterator; +use std::iter::{once, IntoIterator}; use std::ops::RangeInclusive; /// An inclusive interval, used for precise integer exhaustiveness checking. @@ -183,126 +224,39 @@ impl IntRange { Pat { ty, span: DUMMY_SP, kind: Box::new(kind) } } - /// For exhaustive integer matching, some constructors are grouped within other constructors - /// (namely integer typed values are grouped within ranges). However, when specialising these - /// constructors, we want to be specialising for the underlying constructors (the integers), not - /// the groups (the ranges). Thus we need to split the groups up. Splitting them up naïvely would - /// mean creating a separate constructor for every single value in the range, which is clearly - /// impractical. However, observe that for some ranges of integers, the specialisation will be - /// identical across all values in that range (i.e., there are equivalence classes of ranges of - /// constructors based on their `U(S(c, P), S(c, p))` outcome). These classes are grouped by - /// the patterns that apply to them (in the matrix `P`). We can split the range whenever the - /// patterns that apply to that range (specifically: the patterns that *intersect* with that range) - /// change. - /// Our solution, therefore, is to split the range constructor into subranges at every single point - /// the group of intersecting patterns changes (using the method described below). - /// And voilà ! We're testing precisely those ranges that we need to, without any exhaustive matching - /// on actual integers. The nice thing about this is that the number of subranges is linear in the - /// number of rows in the matrix (i.e., the number of cases in the `match` statement), so we don't - /// need to be worried about matching over gargantuan ranges. - /// - /// Essentially, given the first column of a matrix representing ranges, looking like the following: - /// - /// |------| |----------| |-------| || - /// |-------| |-------| |----| || - /// |---------| - /// - /// We split the ranges up into equivalence classes so the ranges are no longer overlapping: - /// - /// |--|--|||-||||--||---|||-------| |-|||| || - /// - /// The logic for determining how to split the ranges is fairly straightforward: we calculate - /// boundaries for each interval range, sort them, then create constructors for each new interval - /// between every pair of boundary points. (This essentially sums up to performing the intuitive - /// merging operation depicted above.) - fn split<'p, 'tcx>( + /// Lint on likely incorrect range patterns (#63987) + pub(super) fn lint_overlapping_range_endpoints<'a, 'tcx: 'a>( &self, - pcx: PatCtxt<'_, 'p, 'tcx>, - hir_id: Option<HirId>, - ) -> SmallVec<[Constructor<'tcx>; 1]> { - /// Represents a border between 2 integers. Because the intervals spanning borders - /// must be able to cover every integer, we need to be able to represent - /// 2^128 + 1 such borders. - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] - enum Border { - JustBefore(u128), - AfterMax, + pcx: PatCtxt<'_, '_, 'tcx>, + ctors: impl Iterator<Item = (&'a Constructor<'tcx>, Span)>, + column_count: usize, + hir_id: HirId, + ) { + if self.is_singleton() { + return; } - // A function for extracting the borders of an integer interval. - fn range_borders(r: IntRange) -> impl Iterator<Item = Border> { - let (lo, hi) = r.range.into_inner(); - let from = Border::JustBefore(lo); - let to = match hi.checked_add(1) { - Some(m) => Border::JustBefore(m), - None => Border::AfterMax, - }; - vec![from, to].into_iter() + if column_count != 1 { + // FIXME: for now, only check for overlapping ranges on simple range + // patterns. Otherwise with the current logic the following is detected + // as overlapping: + // ``` + // match (0u8, true) { + // (0 ..= 125, false) => {} + // (125 ..= 255, true) => {} + // _ => {} + // } + // ``` + return; } - // Collect the span and range of all the intersecting ranges to lint on likely - // incorrect range patterns. (#63987) - let mut overlaps = vec![]; - let row_len = pcx.matrix.column_count().unwrap_or(0); - // `borders` is the set of borders between equivalence classes: each equivalence - // class lies between 2 borders. - let row_borders = pcx - .matrix - .head_ctors_and_spans(pcx.cx) + let overlaps: Vec<_> = ctors .filter_map(|(ctor, span)| Some((ctor.as_int_range()?, span))) - .filter_map(|(range, span)| { - let intersection = self.intersection(&range); - let should_lint = self.suspicious_intersection(&range); - if let (Some(range), 1, true) = (&intersection, row_len, should_lint) { - // FIXME: for now, only check for overlapping ranges on simple range - // patterns. Otherwise with the current logic the following is detected - // as overlapping: - // ``` - // match (0u8, true) { - // (0 ..= 125, false) => {} - // (125 ..= 255, true) => {} - // _ => {} - // } - // ``` - overlaps.push((range.clone(), span)); - } - intersection - }) - .flat_map(range_borders); - let self_borders = range_borders(self.clone()); - let mut borders: Vec<_> = row_borders.chain(self_borders).collect(); - borders.sort_unstable(); - - self.lint_overlapping_range_endpoints(pcx, hir_id, overlaps); - - // We're going to iterate through every adjacent pair of borders, making sure that - // each represents an interval of nonnegative length, and convert each such - // interval into a constructor. - borders - .array_windows() - .filter_map(|&pair| match pair { - [Border::JustBefore(n), Border::JustBefore(m)] => { - if n < m { - Some(n..=(m - 1)) - } else { - None - } - } - [Border::JustBefore(n), Border::AfterMax] => Some(n..=u128::MAX), - [Border::AfterMax, _] => None, - }) - .map(|range| IntRange { range }) - .map(IntRange) - .collect() - } + .filter(|(range, _)| self.suspicious_intersection(range)) + .map(|(range, span)| (self.intersection(&range).unwrap(), span)) + .collect(); - fn lint_overlapping_range_endpoints( - &self, - pcx: PatCtxt<'_, '_, '_>, - hir_id: Option<HirId>, - overlaps: Vec<(IntRange, Span)>, - ) { - if let (true, Some(hir_id)) = (!overlaps.is_empty(), hir_id) { + if !overlaps.is_empty() { pcx.cx.tcx.struct_span_lint_hir( lint::builtin::OVERLAPPING_RANGE_ENDPOINTS, hir_id, @@ -339,6 +293,101 @@ impl IntRange { } } +/// Represents a border between 2 integers. Because the intervals spanning borders must be able to +/// cover every integer, we need to be able to represent 2^128 + 1 such borders. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum IntBorder { + JustBefore(u128), + AfterMax, +} + +/// A range of integers that is partitioned into disjoint subranges. This does constructor +/// splitting for integer ranges as explained at the top of the file. +/// +/// This is fed multiple ranges, and returns an output that covers the input, but is split so that +/// the only intersections between an output range and a seen range are inclusions. No output range +/// straddles the boundary of one of the inputs. +/// +/// The following input: +/// ``` +/// |-------------------------| // `self` +/// |------| |----------| |----| +/// |-------| |-------| +/// ``` +/// would be iterated over as follows: +/// ``` +/// ||---|--||-|---|---|---|--| +/// ``` +#[derive(Debug, Clone)] +struct SplitIntRange { + /// The range we are splitting + range: IntRange, + /// The borders of ranges we have seen. They are all contained within `range`. This is kept + /// sorted. + borders: Vec<IntBorder>, +} + +impl SplitIntRange { + fn new(r: IntRange) -> Self { + SplitIntRange { range: r.clone(), borders: Vec::new() } + } + + /// Internal use + fn to_borders(r: IntRange) -> [IntBorder; 2] { + use IntBorder::*; + let (lo, hi) = r.boundaries(); + let lo = JustBefore(lo); + let hi = match hi.checked_add(1) { + Some(m) => JustBefore(m), + None => AfterMax, + }; + [lo, hi] + } + + /// Add ranges relative to which we split. + fn split(&mut self, ranges: impl Iterator<Item = IntRange>) { + let this_range = &self.range; + let included_ranges = ranges.filter_map(|r| this_range.intersection(&r)); + let included_borders = included_ranges.flat_map(|r| { + let borders = Self::to_borders(r); + once(borders[0]).chain(once(borders[1])) + }); + self.borders.extend(included_borders); + self.borders.sort_unstable(); + } + + /// Iterate over the contained ranges. + fn iter<'a>(&'a self) -> impl Iterator<Item = IntRange> + Captures<'a> { + use IntBorder::*; + + let self_range = Self::to_borders(self.range.clone()); + // Start with the start of the range. + let mut prev_border = self_range[0]; + self.borders + .iter() + .copied() + // End with the end of the range. + .chain(once(self_range[1])) + // List pairs of adjacent borders. + .map(move |border| { + let ret = (prev_border, border); + prev_border = border; + ret + }) + // Skip duplicates. + .filter(|(prev_border, border)| prev_border != border) + // Finally, convert to ranges. + .map(|(prev_border, border)| { + let range = match (prev_border, border) { + (JustBefore(n), JustBefore(m)) if n < m => n..=(m - 1), + (JustBefore(n), AfterMax) => n..=u128::MAX, + _ => unreachable!(), // Ruled out by the sorting and filtering we did + }; + IntRange { range } + }) + } +} + #[derive(Copy, Clone, Debug, PartialEq, Eq)] enum SliceKind { /// Patterns of length `n` (`[x, y]`). @@ -391,129 +440,141 @@ impl Slice { self.kind.arity() } - /// The exhaustiveness-checking paper does not include any details on - /// checking variable-length slice patterns. However, they may be - /// matched by an infinite collection of fixed-length array patterns. - /// - /// Checking the infinite set directly would take an infinite amount - /// of time. However, it turns out that for each finite set of - /// patterns `P`, all sufficiently large array lengths are equivalent: - /// - /// Each slice `s` with a "sufficiently-large" length `l ≥ L` that applies - /// to exactly the subset `Pₜ` of `P` can be transformed to a slice - /// `sₘ` for each sufficiently-large length `m` that applies to exactly - /// the same subset of `P`. - /// - /// Because of that, each witness for reachability-checking of one - /// of the sufficiently-large lengths can be transformed to an - /// equally-valid witness of any other length, so we only have - /// to check slices of the "minimal sufficiently-large length" - /// and less. - /// - /// Note that the fact that there is a *single* `sₘ` for each `m` - /// not depending on the specific pattern in `P` is important: if - /// you look at the pair of patterns - /// `[true, ..]` - /// `[.., false]` - /// Then any slice of length ≥1 that matches one of these two - /// patterns can be trivially turned to a slice of any - /// other length ≥1 that matches them and vice-versa, - /// but the slice of length 2 `[false, true]` that matches neither - /// of these patterns can't be turned to a slice from length 1 that - /// matches neither of these patterns, so we have to consider - /// slices from length 2 there. - /// - /// Now, to see that that length exists and find it, observe that slice - /// patterns are either "fixed-length" patterns (`[_, _, _]`) or - /// "variable-length" patterns (`[_, .., _]`). - /// - /// For fixed-length patterns, all slices with lengths *longer* than - /// the pattern's length have the same outcome (of not matching), so - /// as long as `L` is greater than the pattern's length we can pick - /// any `sₘ` from that length and get the same result. - /// - /// For variable-length patterns, the situation is more complicated, - /// because as seen above the precise value of `sₘ` matters. - /// - /// However, for each variable-length pattern `p` with a prefix of length - /// `plₚ` and suffix of length `slₚ`, only the first `plₚ` and the last - /// `slₚ` elements are examined. - /// - /// Therefore, as long as `L` is positive (to avoid concerns about empty - /// types), all elements after the maximum prefix length and before - /// the maximum suffix length are not examined by any variable-length - /// pattern, and therefore can be added/removed without affecting - /// them - creating equivalent patterns from any sufficiently-large - /// length. - /// - /// Of course, if fixed-length patterns exist, we must be sure - /// that our length is large enough to miss them all, so - /// we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))` - /// - /// for example, with the above pair of patterns, all elements - /// but the first and last can be added/removed, so any - /// witness of length ≥2 (say, `[false, false, true]`) can be - /// turned to a witness from any other length ≥2. - fn split<'p, 'tcx>(self, pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { - let (self_prefix, self_suffix) = match self.kind { - VarLen(self_prefix, self_suffix) => (self_prefix, self_suffix), - _ => return smallvec![Slice(self)], - }; + /// See `Constructor::is_covered_by` + fn is_covered_by(self, other: Self) -> bool { + other.kind.covers_length(self.arity()) + } +} + +/// This computes constructor splitting for variable-length slices, as explained at the top of the +/// file. +/// +/// A slice pattern `[x, .., y]` behaves like the infinite or-pattern `[x, y] | [x, _, y] | [x, _, +/// _, y] | ...`. The corresponding value constructors are fixed-length array constructors above a +/// given minimum length. We obviously can't list this infinitude of constructors. Thankfully, +/// it turns out that for each finite set of slice patterns, all sufficiently large array lengths +/// are equivalent. +/// +/// Let's look at an example, where we are trying to split the last pattern: +/// ``` +/// match x { +/// [true, true, ..] => {} +/// [.., false, false] => {} +/// [..] => {} +/// } +/// ``` +/// Here are the results of specialization for the first few lengths: +/// ``` +/// // length 0 +/// [] => {} +/// // length 1 +/// [_] => {} +/// // length 2 +/// [true, true] => {} +/// [false, false] => {} +/// [_, _] => {} +/// // length 3 +/// [true, true, _ ] => {} +/// [_, false, false] => {} +/// [_, _, _ ] => {} +/// // length 4 +/// [true, true, _, _ ] => {} +/// [_, _, false, false] => {} +/// [_, _, _, _ ] => {} +/// // length 5 +/// [true, true, _, _, _ ] => {} +/// [_, _, _, false, false] => {} +/// [_, _, _, _, _ ] => {} +/// ``` +/// +/// If we went above length 5, we would simply be inserting more columns full of wildcards in the +/// middle. This means that the set of witnesses for length `l >= 5` if equivalent to the set for +/// any other `l' >= 5`: simply add or remove wildcards in the middle to convert between them. +/// +/// This applies to any set of slice patterns: there will be a length `L` above which all lengths +/// behave the same. This is exactly what we need for constructor splitting. Therefore a +/// variable-length slice can be split into a variable-length slice of minimal length `L`, and many +/// fixed-length slices of lengths `< L`. +/// +/// For each variable-length pattern `p` with a prefix of length `plₚ` and suffix of length `slₚ`, +/// only the first `plₚ` and the last `slₚ` elements are examined. Therefore, as long as `L` is +/// positive (to avoid concerns about empty types), all elements after the maximum prefix length +/// and before the maximum suffix length are not examined by any variable-length pattern, and +/// therefore can be added/removed without affecting them - creating equivalent patterns from any +/// sufficiently-large length. +/// +/// Of course, if fixed-length patterns exist, we must be sure that our length is large enough to +/// miss them all, so we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))` +/// +/// `max_slice` below will be made to have arity `L`. +#[derive(Debug)] +struct SplitVarLenSlice { + /// If the type is an array, this is its size. + array_len: Option<u64>, + /// The arity of the input slice. + arity: u64, + /// The smallest slice bigger than any slice seen. `max_slice.arity()` is the length `L` + /// described above. + max_slice: SliceKind, +} - let head_ctors = pcx.matrix.head_ctors(pcx.cx).filter(|c| !c.is_wildcard()); +impl SplitVarLenSlice { + fn new(prefix: u64, suffix: u64, array_len: Option<u64>) -> Self { + SplitVarLenSlice { array_len, arity: prefix + suffix, max_slice: VarLen(prefix, suffix) } + } - let mut max_prefix_len = self_prefix; - let mut max_suffix_len = self_suffix; + /// Pass a set of slices relative to which to split this one. + fn split(&mut self, slices: impl Iterator<Item = SliceKind>) { + let (max_prefix_len, max_suffix_len) = match &mut self.max_slice { + VarLen(prefix, suffix) => (prefix, suffix), + FixedLen(_) => return, // No need to split + }; + // We grow `self.max_slice` to be larger than all slices encountered, as described above. + // For diagnostics, we keep the prefix and suffix lengths separate, but grow them so that + // `L = max_prefix_len + max_suffix_len`. let mut max_fixed_len = 0; - - for ctor in head_ctors { - if let Slice(slice) = ctor { - match slice.kind { - FixedLen(len) => { - max_fixed_len = cmp::max(max_fixed_len, len); - } - VarLen(prefix, suffix) => { - max_prefix_len = cmp::max(max_prefix_len, prefix); - max_suffix_len = cmp::max(max_suffix_len, suffix); - } + for slice in slices { + match slice { + FixedLen(len) => { + max_fixed_len = cmp::max(max_fixed_len, len); + } + VarLen(prefix, suffix) => { + *max_prefix_len = cmp::max(*max_prefix_len, prefix); + *max_suffix_len = cmp::max(*max_suffix_len, suffix); } - } else { - bug!("unexpected ctor for slice type: {:?}", ctor); } } - - // For diagnostics, we keep the prefix and suffix lengths separate, so in the case - // where `max_fixed_len + 1` is the largest, we adapt `max_prefix_len` accordingly, - // so that `L = max_prefix_len + max_suffix_len`. - if max_fixed_len + 1 >= max_prefix_len + max_suffix_len { + // We want `L = max(L, max_fixed_len + 1)`, modulo the fact that we keep prefix and + // suffix separate. + if max_fixed_len + 1 >= *max_prefix_len + *max_suffix_len { // The subtraction can't overflow thanks to the above check. - // The new `max_prefix_len` is also guaranteed to be larger than its previous - // value. - max_prefix_len = max_fixed_len + 1 - max_suffix_len; + // The new `max_prefix_len` is larger than its previous value. + *max_prefix_len = max_fixed_len + 1 - *max_suffix_len; } - let final_slice = VarLen(max_prefix_len, max_suffix_len); - let final_slice = Slice::new(self.array_len, final_slice); + // We cap the arity of `max_slice` at the array size. match self.array_len { - Some(_) => smallvec![Slice(final_slice)], - None => { - // `self` originally covered the range `(self.arity()..infinity)`. We split that - // range into two: lengths smaller than `final_slice.arity()` are treated - // independently as fixed-lengths slices, and lengths above are captured by - // `final_slice`. - let smaller_lengths = (self.arity()..final_slice.arity()).map(FixedLen); - smaller_lengths - .map(|kind| Slice::new(self.array_len, kind)) - .chain(Some(final_slice)) - .map(Slice) - .collect() - } + Some(len) if self.max_slice.arity() >= len => self.max_slice = FixedLen(len), + _ => {} } } - /// See `Constructor::is_covered_by` - fn is_covered_by(self, other: Self) -> bool { - other.kind.covers_length(self.arity()) + /// Iterate over the partition of this slice. + fn iter<'a>(&'a self) -> impl Iterator<Item = Slice> + Captures<'a> { + let smaller_lengths = match self.array_len { + // The only admissible fixed-length slice is one of the array size. Whether `max_slice` + // is fixed-length or variable-length, it will be the only relevant slice to output + // here. + Some(_) => (0..0), // empty range + // We cover all arities in the range `(self.arity..infinity)`. We split that range into + // two: lengths smaller than `max_slice.arity()` are treated independently as + // fixed-lengths slices, and lengths above are captured by `max_slice`. + None => self.arity..self.max_slice.arity(), + }; + smaller_lengths + .map(FixedLen) + .chain(once(self.max_slice)) + .map(move |kind| Slice::new(self.array_len, kind)) } } @@ -546,6 +607,9 @@ pub(super) enum Constructor<'tcx> { /// Fake extra constructor for enums that aren't allowed to be matched exhaustively. Also used /// for those types for which we cannot list constructors explicitly, like `f64` and `str`. NonExhaustive, + /// Stands for constructors that are not seen in the matrix, as explained in the documentation + /// for [`SplitWildcard`]. + Missing, /// Wildcard pattern. Wildcard, } @@ -652,48 +716,41 @@ impl<'tcx> Constructor<'tcx> { /// This function may discard some irrelevant constructors if this preserves behavior and /// diagnostics. Eg. for the `_` case, we ignore the constructors already present in the /// matrix, unless all of them are. - /// - /// `hir_id` is `None` when we're evaluating the wildcard pattern. In that case we do not want - /// to lint for overlapping ranges. - pub(super) fn split<'p>( + pub(super) fn split<'a>( &self, - pcx: PatCtxt<'_, 'p, 'tcx>, - hir_id: Option<HirId>, - ) -> SmallVec<[Self; 1]> { - debug!("Constructor::split({:#?}, {:#?})", self, pcx.matrix); + pcx: PatCtxt<'_, '_, 'tcx>, + ctors: impl Iterator<Item = &'a Constructor<'tcx>> + Clone, + ) -> SmallVec<[Self; 1]> + where + 'tcx: 'a, + { + debug!("Constructor::split({:#?})", self); match self { - Wildcard => Constructor::split_wildcard(pcx), + Wildcard => { + let mut split_wildcard = SplitWildcard::new(pcx); + split_wildcard.split(pcx, ctors); + split_wildcard.into_ctors(pcx) + } // Fast-track if the range is trivial. In particular, we don't do the overlapping // ranges check. - IntRange(ctor_range) if !ctor_range.is_singleton() => ctor_range.split(pcx, hir_id), - Slice(slice @ Slice { kind: VarLen(..), .. }) => slice.split(pcx), + IntRange(ctor_range) if !ctor_range.is_singleton() => { + let mut split_range = SplitIntRange::new(ctor_range.clone()); + let int_ranges = ctors.filter_map(|ctor| ctor.as_int_range()); + split_range.split(int_ranges.cloned()); + split_range.iter().map(IntRange).collect() + } + &Slice(Slice { kind: VarLen(self_prefix, self_suffix), array_len }) => { + let mut split_self = SplitVarLenSlice::new(self_prefix, self_suffix, array_len); + let slices = ctors.filter_map(|c| c.as_slice()).map(|s| s.kind); + split_self.split(slices); + split_self.iter().map(Slice).collect() + } // Any other constructor can be used unchanged. _ => smallvec![self.clone()], } } - /// For wildcards, there are two groups of constructors: there are the constructors actually - /// present in the matrix (`head_ctors`), and the constructors not present (`missing_ctors`). - /// Two constructors that are not in the matrix will either both be caught (by a wildcard), or - /// both not be caught. Therefore we can keep the missing constructors grouped together. - fn split_wildcard<'p>(pcx: PatCtxt<'_, 'p, 'tcx>) -> SmallVec<[Self; 1]> { - // Missing constructors are those that are not matched by any non-wildcard patterns in the - // current column. We only fully construct them on-demand, because they're rarely used and - // can be big. - let missing_ctors = MissingConstructors::new(pcx); - if missing_ctors.is_empty(pcx) { - // All the constructors are present in the matrix, so we just go through them all. - // We must also split them first. - missing_ctors.all_ctors - } else { - // Some constructors are missing, thus we can specialize with the wildcard constructor, - // which will stand for those constructors that are missing, and behaves like any of - // them. - smallvec![Wildcard] - } - } - /// Returns whether `self` is covered by `other`, i.e. whether `self` is a subset of `other`. /// For the simple cases, this is simply checking for equality. For the "grouped" constructors, /// this checks for inclusion. @@ -704,8 +761,8 @@ impl<'tcx> Constructor<'tcx> { match (self, other) { // Wildcards cover anything (_, Wildcard) => true, - // Wildcards are only covered by wildcards - (Wildcard, _) => false, + // The missing ctors are not covered by anything in the matrix except wildcards. + (Missing | Wildcard, _) => false, (Single, Single) => true, (Variant(self_id), Variant(other_id)) => self_id == other_id, @@ -778,247 +835,253 @@ impl<'tcx> Constructor<'tcx> { .any(|other| slice.is_covered_by(other)), // This constructor is never covered by anything else NonExhaustive => false, - Str(..) | FloatRange(..) | Opaque | Wildcard => { + Str(..) | FloatRange(..) | Opaque | Missing | Wildcard => { span_bug!(pcx.span, "found unexpected ctor in all_ctors: {:?}", self) } } } } -/// This determines the set of all possible constructors of a pattern matching -/// values of type `left_ty`. For vectors, this would normally be an infinite set -/// but is instead bounded by the maximum fixed length of slice patterns in -/// the column of patterns being analyzed. +/// A wildcard constructor that we split relative to the constructors in the matrix, as explained +/// at the top of the file. /// -/// We make sure to omit constructors that are statically impossible. E.g., for -/// `Option<!>`, we do not include `Some(_)` in the returned list of constructors. -/// Invariant: this returns an empty `Vec` if and only if the type is uninhabited (as determined by -/// `cx.is_uninhabited()`). -fn all_constructors<'p, 'tcx>(pcx: PatCtxt<'_, 'p, 'tcx>) -> Vec<Constructor<'tcx>> { - debug!("all_constructors({:?})", pcx.ty); - let cx = pcx.cx; - let make_range = |start, end| { - IntRange( - // `unwrap()` is ok because we know the type is an integer. - IntRange::from_range(cx.tcx, start, end, pcx.ty, &RangeEnd::Included).unwrap(), - ) - }; - match pcx.ty.kind() { - ty::Bool => vec![make_range(0, 1)], - ty::Array(sub_ty, len) if len.try_eval_usize(cx.tcx, cx.param_env).is_some() => { - let len = len.eval_usize(cx.tcx, cx.param_env); - if len != 0 && cx.is_uninhabited(sub_ty) { - vec![] - } else { - vec![Slice(Slice::new(Some(len), VarLen(0, 0)))] - } - } - // Treat arrays of a constant but unknown length like slices. - ty::Array(sub_ty, _) | ty::Slice(sub_ty) => { - let kind = if cx.is_uninhabited(sub_ty) { FixedLen(0) } else { VarLen(0, 0) }; - vec![Slice(Slice::new(None, kind))] - } - ty::Adt(def, substs) if def.is_enum() => { - // If the enum is declared as `#[non_exhaustive]`, we treat it as if it had an - // additional "unknown" constructor. - // There is no point in enumerating all possible variants, because the user can't - // actually match against them all themselves. So we always return only the fictitious - // constructor. - // E.g., in an example like: - // - // ``` - // let err: io::ErrorKind = ...; - // match err { - // io::ErrorKind::NotFound => {}, - // } - // ``` - // - // we don't want to show every possible IO error, but instead have only `_` as the - // witness. - let is_declared_nonexhaustive = cx.is_foreign_non_exhaustive_enum(pcx.ty); - - // If `exhaustive_patterns` is disabled and our scrutinee is an empty enum, we treat it - // as though it had an "unknown" constructor to avoid exposing its emptiness. The - // exception is if the pattern is at the top level, because we want empty matches to be - // considered exhaustive. - let is_secretly_empty = def.variants.is_empty() - && !cx.tcx.features().exhaustive_patterns - && !pcx.is_top_level; - - if is_secretly_empty || is_declared_nonexhaustive { - vec![NonExhaustive] - } else if cx.tcx.features().exhaustive_patterns { - // If `exhaustive_patterns` is enabled, we exclude variants known to be - // uninhabited. - def.variants - .iter() - .filter(|v| { - !v.uninhabited_from(cx.tcx, substs, def.adt_kind(), cx.param_env) - .contains(cx.tcx, cx.module) - }) - .map(|v| Variant(v.def_id)) - .collect() - } else { - def.variants.iter().map(|v| Variant(v.def_id)).collect() - } - } - ty::Char => { - vec![ - // The valid Unicode Scalar Value ranges. - make_range('\u{0000}' as u128, '\u{D7FF}' as u128), - make_range('\u{E000}' as u128, '\u{10FFFF}' as u128), - ] - } - ty::Int(_) | ty::Uint(_) - if pcx.ty.is_ptr_sized_integral() - && !cx.tcx.features().precise_pointer_size_matching => - { - // `usize`/`isize` are not allowed to be matched exhaustively unless the - // `precise_pointer_size_matching` feature is enabled. So we treat those types like - // `#[non_exhaustive]` enums by returning a special unmatcheable constructor. - vec![NonExhaustive] - } - &ty::Int(ity) => { - let bits = Integer::from_attr(&cx.tcx, SignedInt(ity)).size().bits() as u128; - let min = 1u128 << (bits - 1); - let max = min - 1; - vec![make_range(min, max)] - } - &ty::Uint(uty) => { - let size = Integer::from_attr(&cx.tcx, UnsignedInt(uty)).size(); - let max = size.truncate(u128::MAX); - vec![make_range(0, max)] - } - // If `exhaustive_patterns` is disabled and our scrutinee is the never type, we cannot - // expose its emptiness. The exception is if the pattern is at the top level, because we - // want empty matches to be considered exhaustive. - ty::Never if !cx.tcx.features().exhaustive_patterns && !pcx.is_top_level => { - vec![NonExhaustive] - } - ty::Never => vec![], - _ if cx.is_uninhabited(pcx.ty) => vec![], - ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => vec![Single], - // This type is one for which we cannot list constructors, like `str` or `f64`. - _ => vec![NonExhaustive], - } -} - -// A struct to compute a set of constructors equivalent to `all_ctors \ used_ctors`. +/// A constructor that is not present in the matrix rows will only be covered by the rows that have +/// wildcards. Thus we can group all of those constructors together; we call them "missing +/// constructors". Splitting a wildcard would therefore list all present constructors individually +/// (or grouped if they are integers or slices), and then all missing constructors together as a +/// group. +/// +/// However we can go further: since any constructor will match the wildcard rows, and having more +/// rows can only reduce the amount of usefulness witnesses, we can skip the present constructors +/// and only try the missing ones. +/// This will not preserve the whole list of witnesses, but will preserve whether the list is empty +/// or not. In fact this is quite natural from the point of view of diagnostics too. This is done +/// in `to_ctors`: in some cases we only return `Missing`. #[derive(Debug)] -pub(super) struct MissingConstructors<'tcx> { +pub(super) struct SplitWildcard<'tcx> { + /// Constructors seen in the matrix. + matrix_ctors: Vec<Constructor<'tcx>>, + /// All the constructors for this type all_ctors: SmallVec<[Constructor<'tcx>; 1]>, - used_ctors: Vec<Constructor<'tcx>>, } -impl<'tcx> MissingConstructors<'tcx> { +impl<'tcx> SplitWildcard<'tcx> { pub(super) fn new<'p>(pcx: PatCtxt<'_, 'p, 'tcx>) -> Self { - let used_ctors: Vec<Constructor<'_>> = - pcx.matrix.head_ctors(pcx.cx).cloned().filter(|c| !c.is_wildcard()).collect(); - // Since `all_ctors` never contains wildcards, this won't recurse further. - let all_ctors = - all_constructors(pcx).into_iter().flat_map(|ctor| ctor.split(pcx, None)).collect(); + debug!("SplitWildcard::new({:?})", pcx.ty); + let cx = pcx.cx; + let make_range = |start, end| { + IntRange( + // `unwrap()` is ok because we know the type is an integer. + IntRange::from_range(cx.tcx, start, end, pcx.ty, &RangeEnd::Included).unwrap(), + ) + }; + // This determines the set of all possible constructors for the type `pcx.ty`. For numbers, + // arrays and slices we use ranges and variable-length slices when appropriate. + // + // If the `exhaustive_patterns` feature is enabled, we make sure to omit constructors that + // are statically impossible. E.g., for `Option<!>`, we do not include `Some(_)` in the + // returned list of constructors. + // Invariant: this is empty if and only if the type is uninhabited (as determined by + // `cx.is_uninhabited()`). + let all_ctors = match pcx.ty.kind() { + ty::Bool => smallvec![make_range(0, 1)], + ty::Array(sub_ty, len) if len.try_eval_usize(cx.tcx, cx.param_env).is_some() => { + let len = len.eval_usize(cx.tcx, cx.param_env); + if len != 0 && cx.is_uninhabited(sub_ty) { + smallvec![] + } else { + smallvec![Slice(Slice::new(Some(len), VarLen(0, 0)))] + } + } + // Treat arrays of a constant but unknown length like slices. + ty::Array(sub_ty, _) | ty::Slice(sub_ty) => { + let kind = if cx.is_uninhabited(sub_ty) { FixedLen(0) } else { VarLen(0, 0) }; + smallvec![Slice(Slice::new(None, kind))] + } + ty::Adt(def, substs) if def.is_enum() => { + // If the enum is declared as `#[non_exhaustive]`, we treat it as if it had an + // additional "unknown" constructor. + // There is no point in enumerating all possible variants, because the user can't + // actually match against them all themselves. So we always return only the fictitious + // constructor. + // E.g., in an example like: + // + // ``` + // let err: io::ErrorKind = ...; + // match err { + // io::ErrorKind::NotFound => {}, + // } + // ``` + // + // we don't want to show every possible IO error, but instead have only `_` as the + // witness. + let is_declared_nonexhaustive = cx.is_foreign_non_exhaustive_enum(pcx.ty); + + // If `exhaustive_patterns` is disabled and our scrutinee is an empty enum, we treat it + // as though it had an "unknown" constructor to avoid exposing its emptiness. The + // exception is if the pattern is at the top level, because we want empty matches to be + // considered exhaustive. + let is_secretly_empty = def.variants.is_empty() + && !cx.tcx.features().exhaustive_patterns + && !pcx.is_top_level; + + if is_secretly_empty || is_declared_nonexhaustive { + smallvec![NonExhaustive] + } else if cx.tcx.features().exhaustive_patterns { + // If `exhaustive_patterns` is enabled, we exclude variants known to be + // uninhabited. + def.variants + .iter() + .filter(|v| { + !v.uninhabited_from(cx.tcx, substs, def.adt_kind(), cx.param_env) + .contains(cx.tcx, cx.module) + }) + .map(|v| Variant(v.def_id)) + .collect() + } else { + def.variants.iter().map(|v| Variant(v.def_id)).collect() + } + } + ty::Char => { + smallvec![ + // The valid Unicode Scalar Value ranges. + make_range('\u{0000}' as u128, '\u{D7FF}' as u128), + make_range('\u{E000}' as u128, '\u{10FFFF}' as u128), + ] + } + ty::Int(_) | ty::Uint(_) + if pcx.ty.is_ptr_sized_integral() + && !cx.tcx.features().precise_pointer_size_matching => + { + // `usize`/`isize` are not allowed to be matched exhaustively unless the + // `precise_pointer_size_matching` feature is enabled. So we treat those types like + // `#[non_exhaustive]` enums by returning a special unmatcheable constructor. + smallvec![NonExhaustive] + } + &ty::Int(ity) => { + let bits = Integer::from_attr(&cx.tcx, SignedInt(ity)).size().bits() as u128; + let min = 1u128 << (bits - 1); + let max = min - 1; + smallvec![make_range(min, max)] + } + &ty::Uint(uty) => { + let size = Integer::from_attr(&cx.tcx, UnsignedInt(uty)).size(); + let max = size.truncate(u128::MAX); + smallvec![make_range(0, max)] + } + // If `exhaustive_patterns` is disabled and our scrutinee is the never type, we cannot + // expose its emptiness. The exception is if the pattern is at the top level, because we + // want empty matches to be considered exhaustive. + ty::Never if !cx.tcx.features().exhaustive_patterns && !pcx.is_top_level => { + smallvec![NonExhaustive] + } + ty::Never => smallvec![], + _ if cx.is_uninhabited(pcx.ty) => smallvec![], + ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => smallvec![Single], + // This type is one for which we cannot list constructors, like `str` or `f64`. + _ => smallvec![NonExhaustive], + }; + SplitWildcard { matrix_ctors: Vec::new(), all_ctors } + } - MissingConstructors { all_ctors, used_ctors } + /// Pass a set of constructors relative to which to split this one. Don't call twice, it won't + /// do what you want. + pub(super) fn split<'a>( + &mut self, + pcx: PatCtxt<'_, '_, 'tcx>, + ctors: impl Iterator<Item = &'a Constructor<'tcx>> + Clone, + ) where + 'tcx: 'a, + { + // Since `all_ctors` never contains wildcards, this won't recurse further. + self.all_ctors = + self.all_ctors.iter().flat_map(|ctor| ctor.split(pcx, ctors.clone())).collect(); + self.matrix_ctors = ctors.filter(|c| !c.is_wildcard()).cloned().collect(); } - fn is_empty<'p>(&self, pcx: PatCtxt<'_, 'p, 'tcx>) -> bool { - self.iter(pcx).next().is_none() + /// Whether there are any value constructors for this type that are not present in the matrix. + fn any_missing(&self, pcx: PatCtxt<'_, '_, 'tcx>) -> bool { + self.iter_missing(pcx).next().is_some() } - /// Iterate over all_ctors \ used_ctors - fn iter<'a, 'p>( + /// Iterate over the constructors for this type that are not present in the matrix. + pub(super) fn iter_missing<'a, 'p>( &'a self, pcx: PatCtxt<'a, 'p, 'tcx>, ) -> impl Iterator<Item = &'a Constructor<'tcx>> + Captures<'p> { - self.all_ctors.iter().filter(move |ctor| !ctor.is_covered_by_any(pcx, &self.used_ctors)) + self.all_ctors.iter().filter(move |ctor| !ctor.is_covered_by_any(pcx, &self.matrix_ctors)) } - /// List the patterns corresponding to the missing constructors. In some cases, instead of - /// listing all constructors of a given type, we prefer to simply report a wildcard. - pub(super) fn report_patterns<'p>( - &self, - pcx: PatCtxt<'_, 'p, 'tcx>, - ) -> SmallVec<[Pat<'tcx>; 1]> { - // There are 2 ways we can report a witness here. - // Commonly, we can report all the "free" - // constructors as witnesses, e.g., if we have: - // - // ``` - // enum Direction { N, S, E, W } - // let Direction::N = ...; - // ``` - // - // we can report 3 witnesses: `S`, `E`, and `W`. - // - // However, there is a case where we don't want - // to do this and instead report a single `_` witness: - // if the user didn't actually specify a constructor - // in this arm, e.g., in - // - // ``` - // let x: (Direction, Direction, bool) = ...; - // let (_, _, false) = x; - // ``` - // - // we don't want to show all 16 possible witnesses - // `(<direction-1>, <direction-2>, true)` - we are - // satisfied with `(_, _, true)`. In this case, - // `used_ctors` is empty. - // The exception is: if we are at the top-level, for example in an empty match, we - // sometimes prefer reporting the list of constructors instead of just `_`. - let report_when_all_missing = pcx.is_top_level && !IntRange::is_integral(pcx.ty); - if self.used_ctors.is_empty() && !report_when_all_missing { - // All constructors are unused. Report only a wildcard - // rather than each individual constructor. - smallvec![Pat::wildcard_from_ty(pcx.ty)] - } else { - // Construct for each missing constructor a "wild" version of this - // constructor, that matches everything that can be built with - // it. For example, if `ctor` is a `Constructor::Variant` for - // `Option::Some`, we get the pattern `Some(_)`. - self.iter(pcx) - .map(|missing_ctor| Fields::wildcards(pcx, &missing_ctor).apply(pcx, missing_ctor)) - .collect() + /// Return the set of constructors resulting from splitting the wildcard. As explained at the + /// top of the file, if any constructors are missing we can ignore the present ones. + fn into_ctors(self, pcx: PatCtxt<'_, '_, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { + if self.any_missing(pcx) { + // Some constructors are missing, thus we can specialize with the special `Missing` + // constructor, which stands for those constructors that are not seen in the matrix, + // and matches the same rows as any of them (namely the wildcard rows). See the top of + // the file for details. + // However, when all constructors are missing we can also specialize with the full + // `Wildcard` constructor. The difference will depend on what we want in diagnostics. + + // If some constructors are missing, we typically want to report those constructors, + // e.g.: + // ``` + // enum Direction { N, S, E, W } + // let Direction::N = ...; + // ``` + // we can report 3 witnesses: `S`, `E`, and `W`. + // + // However, if the user didn't actually specify a constructor + // in this arm, e.g., in + // ``` + // let x: (Direction, Direction, bool) = ...; + // let (_, _, false) = x; + // ``` + // we don't want to show all 16 possible witnesses `(<direction-1>, <direction-2>, + // true)` - we are satisfied with `(_, _, true)`. So if all constructors are missing we + // prefer to report just a wildcard `_`. + // + // The exception is: if we are at the top-level, for example in an empty match, we + // sometimes prefer reporting the list of constructors instead of just `_`. + let report_when_all_missing = pcx.is_top_level && !IntRange::is_integral(pcx.ty); + let ctor = if !self.matrix_ctors.is_empty() || report_when_all_missing { + Missing + } else { + Wildcard + }; + return smallvec![ctor]; } + + // All the constructors are present in the matrix, so we just go through them all. + self.all_ctors } } /// Some fields need to be explicitly hidden away in certain cases; see the comment above the -/// `Fields` struct. This struct represents such a potentially-hidden field. When a field is hidden -/// we still keep its type around. +/// `Fields` struct. This struct represents such a potentially-hidden field. #[derive(Debug, Copy, Clone)] pub(super) enum FilteredField<'p, 'tcx> { Kept(&'p Pat<'tcx>), - Hidden(Ty<'tcx>), + Hidden, } impl<'p, 'tcx> FilteredField<'p, 'tcx> { fn kept(self) -> Option<&'p Pat<'tcx>> { match self { FilteredField::Kept(p) => Some(p), - FilteredField::Hidden(_) => None, - } - } - - fn to_pattern(self) -> Pat<'tcx> { - match self { - FilteredField::Kept(p) => p.clone(), - FilteredField::Hidden(ty) => Pat::wildcard_from_ty(ty), + FilteredField::Hidden => None, } } } /// A value can be decomposed into a constructor applied to some fields. This struct represents /// those fields, generalized to allow patterns in each field. See also `Constructor`. +/// This is constructed from a constructor using [`Fields::wildcards()`]. /// /// If a private or `non_exhaustive` field is uninhabited, the code mustn't observe that it is -/// uninhabited. For that, we filter these fields out of the matrix. This is subtle because we -/// still need to have those fields back when going to/from a `Pat`. Most of this is handled -/// automatically in `Fields`, but when constructing or deconstructing `Fields` you need to be -/// careful. As a rule, when going to/from the matrix, use the filtered field list; when going -/// to/from `Pat`, use the full field list. -/// This filtering is uncommon in practice, because uninhabited fields are rarely used, so we avoid -/// it when possible to preserve performance. +/// uninhabited. For that, we filter these fields out of the matrix. This is handled automatically +/// in `Fields`. This filtering is uncommon in practice, because uninhabited fields are rarely used, +/// so we avoid it when possible to preserve performance. #[derive(Debug, Clone)] pub(super) enum Fields<'p, 'tcx> { /// Lists of patterns that don't contain any filtered fields. @@ -1027,21 +1090,19 @@ pub(super) enum Fields<'p, 'tcx> { /// have not measured if it really made a difference. Slice(&'p [Pat<'tcx>]), Vec(SmallVec<[&'p Pat<'tcx>; 2]>), - /// Patterns where some of the fields need to be hidden. `kept_count` caches the number of - /// non-hidden fields. + /// Patterns where some of the fields need to be hidden. For all intents and purposes we only + /// care about the non-hidden fields. We need to keep the real field index for those fields; + /// we're morally storing a `Vec<(usize, &Pat)>` but what we do is more convenient. + /// `len` counts the number of non-hidden fields Filtered { fields: SmallVec<[FilteredField<'p, 'tcx>; 2]>, - kept_count: usize, + len: usize, }, } impl<'p, 'tcx> Fields<'p, 'tcx> { - fn empty() -> Self { - Fields::Slice(&[]) - } - - /// Construct a new `Fields` from the given pattern. Must not be used if the pattern is a field - /// of a struct/tuple/variant. + /// Internal use. Use `Fields::wildcards()` instead. + /// Must not be used if the pattern is a field of a struct/tuple/variant. fn from_single_pattern(pat: &'p Pat<'tcx>) -> Self { Fields::Slice(std::slice::from_ref(pat)) } @@ -1086,7 +1147,7 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { if has_no_hidden_fields { Fields::wildcards_from_tys(cx, field_tys) } else { - let mut kept_count = 0; + let mut len = 0; let fields = variant .fields .iter() @@ -1101,14 +1162,14 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { // order not to reveal the uninhabitedness of the whole // variant. if is_uninhabited && (!is_visible || is_non_exhaustive) { - FilteredField::Hidden(ty) + FilteredField::Hidden } else { - kept_count += 1; + len += 1; FilteredField::Kept(wildcard_from_ty(ty)) } }) .collect(); - Fields::Filtered { fields, kept_count } + Fields::Filtered { fields, len } } } } @@ -1121,9 +1182,8 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { } _ => bug!("bad slice pattern {:?} {:?}", constructor, ty), }, - Str(..) | FloatRange(..) | IntRange(..) | NonExhaustive | Opaque | Wildcard => { - Fields::empty() - } + Str(..) | FloatRange(..) | IntRange(..) | NonExhaustive | Opaque | Missing + | Wildcard => Fields::Slice(&[]), }; debug!("Fields::wildcards({:?}, {:?}) = {:#?}", constructor, ty, ret); ret @@ -1145,14 +1205,16 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { /// `self`: `[false]` /// returns `Some(false)` pub(super) fn apply(self, pcx: PatCtxt<'_, 'p, 'tcx>, ctor: &Constructor<'tcx>) -> Pat<'tcx> { - let mut subpatterns = self.all_patterns(); + let subpatterns_and_indices = self.patterns_and_indices(); + let mut subpatterns = subpatterns_and_indices.iter().map(|&(_, p)| p).cloned(); let pat = match ctor { Single | Variant(_) => match pcx.ty.kind() { ty::Adt(..) | ty::Tuple(..) => { - let subpatterns = subpatterns - .enumerate() - .map(|(i, p)| FieldPat { field: Field::new(i), pattern: p }) + // We want the real indices here. + let subpatterns = subpatterns_and_indices + .iter() + .map(|&(field, p)| FieldPat { field, pattern: p.clone() }) .collect(); if let ty::Adt(adt, substs) = pcx.ty.kind() { @@ -1207,48 +1269,52 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { &FloatRange(lo, hi, end) => PatKind::Range(PatRange { lo, hi, end }), IntRange(range) => return range.to_pat(pcx.cx.tcx, pcx.ty), NonExhaustive => PatKind::Wild, + Wildcard => return Pat::wildcard_from_ty(pcx.ty), Opaque => bug!("we should not try to apply an opaque constructor"), - Wildcard => bug!( - "trying to apply a wildcard constructor; this should have been done in `apply_constructors`" + Missing => bug!( + "trying to apply the `Missing` constructor; this should have been done in `apply_constructors`" ), }; Pat { ty: pcx.ty, span: DUMMY_SP, kind: Box::new(pat) } } - /// Returns the number of patterns from the viewpoint of match-checking, i.e. excluding hidden - /// fields. This is what we want in most cases in this file, the only exception being - /// conversion to/from `Pat`. + /// Returns the number of patterns. This is the same as the arity of the constructor used to + /// construct `self`. pub(super) fn len(&self) -> usize { match self { Fields::Slice(pats) => pats.len(), Fields::Vec(pats) => pats.len(), - Fields::Filtered { kept_count, .. } => *kept_count, + Fields::Filtered { len, .. } => *len, } } - /// Returns the complete list of patterns, including hidden fields. - fn all_patterns(self) -> impl Iterator<Item = Pat<'tcx>> { - let pats: SmallVec<[_; 2]> = match self { - Fields::Slice(pats) => pats.iter().cloned().collect(), - Fields::Vec(pats) => pats.into_iter().cloned().collect(), + /// Returns the list of patterns along with the corresponding field indices. + fn patterns_and_indices(&self) -> SmallVec<[(Field, &'p Pat<'tcx>); 2]> { + match self { + Fields::Slice(pats) => { + pats.iter().enumerate().map(|(i, p)| (Field::new(i), p)).collect() + } + Fields::Vec(pats) => { + pats.iter().copied().enumerate().map(|(i, p)| (Field::new(i), p)).collect() + } Fields::Filtered { fields, .. } => { - // We don't skip any fields here. - fields.into_iter().map(|p| p.to_pattern()).collect() + // Indices must be relative to the full list of patterns + fields + .iter() + .enumerate() + .filter_map(|(i, p)| Some((Field::new(i), p.kept()?))) + .collect() } - }; - pats.into_iter() + } } - /// Returns the filtered list of patterns, not including hidden fields. - pub(super) fn filtered_patterns(self) -> SmallVec<[&'p Pat<'tcx>; 2]> { + /// Returns the list of patterns. + pub(super) fn into_patterns(self) -> SmallVec<[&'p Pat<'tcx>; 2]> { match self { Fields::Slice(pats) => pats.iter().collect(), Fields::Vec(pats) => pats, - Fields::Filtered { fields, .. } => { - // We skip hidden fields here - fields.into_iter().filter_map(|p| p.kept()).collect() - } + Fields::Filtered { fields, .. } => fields.iter().filter_map(|p| p.kept()).collect(), } } @@ -1264,10 +1330,10 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { } /// Overrides some of the fields with the provided patterns. This is used when a pattern - /// defines some fields but not all, for example `Foo { field1: Some(_), .. }`: here we start with a - /// `Fields` that is just one wildcard per field of the `Foo` struct, and override the entry - /// corresponding to `field1` with the pattern `Some(_)`. This is also used for slice patterns - /// for the same reason. + /// defines some fields but not all, for example `Foo { field1: Some(_), .. }`: here we start + /// with a `Fields` that is just one wildcard per field of the `Foo` struct, and override the + /// entry corresponding to `field1` with the pattern `Some(_)`. This is also used for slice + /// patterns for the same reason. fn replace_fields_indexed( &self, new_pats: impl IntoIterator<Item = (usize, &'p Pat<'tcx>)>, @@ -1295,8 +1361,8 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { fields } - /// Replaces contained fields with the given filtered list of patterns, e.g. taken from the - /// matrix. There must be `len()` patterns in `pats`. + /// Replaces contained fields with the given list of patterns. There must be `len()` patterns + /// in `pats`. pub(super) fn replace_fields( &self, cx: &MatchCheckCtxt<'p, 'tcx>, @@ -1305,7 +1371,7 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { let pats: &[_] = cx.pattern_arena.alloc_from_iter(pats); match self { - Fields::Filtered { fields, kept_count } => { + Fields::Filtered { fields, len } => { let mut pats = pats.iter(); let mut fields = fields.clone(); for f in &mut fields { @@ -1314,7 +1380,7 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { *p = pats.next().unwrap(); } } - Fields::Filtered { fields, kept_count: *kept_count } + Fields::Filtered { fields, len: *len } } _ => Fields::Slice(pats), } diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 0ecc034ac0b..83fee380ccc 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -12,301 +12,278 @@ //! //! ----- //! -//! This file includes the logic for exhaustiveness and usefulness checking for -//! pattern-matching. Specifically, given a list of patterns for a type, we can -//! tell whether: -//! (a) the patterns cover every possible constructor for the type (exhaustiveness) -//! (b) each pattern is necessary (usefulness) +//! This file includes the logic for exhaustiveness and reachability checking for pattern-matching. +//! Specifically, given a list of patterns for a type, we can tell whether: +//! (a) each pattern is reachable (reachability) +//! (b) the patterns cover every possible value for the type (exhaustiveness) //! -//! The algorithm implemented here is a modified version of the one described in -//! [this paper](http://moscova.inria.fr/~maranget/papers/warn/index.html). -//! However, to save future implementors from reading the original paper, we -//! summarise the algorithm here to hopefully save time and be a little clearer -//! (without being so rigorous). +//! The algorithm implemented here is a modified version of the one described in [this +//! paper](http://moscova.inria.fr/~maranget/papers/warn/index.html). We have however generalized +//! it to accommodate the variety of patterns that Rust supports. We thus explain our version here, +//! without being as rigorous. //! -//! # Premise //! -//! The core of the algorithm revolves about a "usefulness" check. In particular, we -//! are trying to compute a predicate `U(P, p)` where `P` is a list of patterns (we refer to this as -//! a matrix). `U(P, p)` represents whether, given an existing list of patterns -//! `P_1 ..= P_m`, adding a new pattern `p` will be "useful" (that is, cover previously- -//! uncovered values of the type). +//! # Summary //! -//! If we have this predicate, then we can easily compute both exhaustiveness of an -//! entire set of patterns and the individual usefulness of each one. -//! (a) the set of patterns is exhaustive iff `U(P, _)` is false (i.e., adding a wildcard -//! match doesn't increase the number of values we're matching) -//! (b) a pattern `P_i` is not useful if `U(P[0..=(i-1), P_i)` is false (i.e., adding a -//! pattern to those that have come before it doesn't increase the number of values -//! we're matching). +//! The core of the algorithm is the notion of "usefulness". A pattern `q` is said to be *useful* +//! relative to another pattern `p` of the same type if there is a value that is matched by `q` and +//! not matched by `p`. This generalizes to many `p`s: `q` is useful w.r.t. a list of patterns +//! `p_1 .. p_n` if there is a value that is matched by `q` and by none of the `p_i`. We write +//! `usefulness(p_1 .. p_n, q)` for a function that returns a list of such values. The aim of this +//! file is to compute it efficiently. //! -//! # Core concept +//! This is enough to compute reachability: a pattern in a `match` expression is reachable iff it +//! is useful w.r.t. the patterns above it: +//! ```rust +//! match x { +//! Some(_) => ..., +//! None => ..., // reachable: `None` is matched by this but not the branch above +//! Some(0) => ..., // unreachable: all the values this matches are already matched by +//! // `Some(_)` above +//! } +//! ``` +//! +//! This is also enough to compute exhaustiveness: a match is exhaustive iff the wildcard `_` +//! pattern is _not_ useful w.r.t. the patterns in the match. The values returned by `usefulness` +//! are used to tell the user which values are missing. +//! ```rust +//! match x { +//! Some(0) => ..., +//! None => ..., +//! // not exhaustive: `_` is useful because it matches `Some(1)` +//! } +//! ``` //! -//! The idea that powers everything that is done in this file is the following: a value is made -//! from a constructor applied to some fields. Examples of constructors are `Some`, `None`, `(,)` -//! (the 2-tuple constructor), `Foo {..}` (the constructor for a struct `Foo`), and `2` (the -//! constructor for the number `2`). Fields are just a (possibly empty) list of values. +//! The entrypoint of this file is the [`compute_match_usefulness`] function, which computes +//! reachability for each match branch and exhaustiveness for the whole match. //! -//! Some of the constructors listed above might feel weird: `None` and `2` don't take any -//! arguments. This is part of what makes constructors so general: we will consider plain values -//! like numbers and string literals to be constructors that take no arguments, also called "0-ary -//! constructors"; they are the simplest case of constructors. This allows us to see any value as -//! made up from a tree of constructors, each having a given number of children. For example: -//! `(None, Ok(0))` is made from 4 different constructors. //! -//! This idea can be extended to patterns: a pattern captures a set of possible values, and we can -//! describe this set using constructors. For example, `Err(_)` captures all values of the type -//! `Result<T, E>` that start with the `Err` constructor (for some choice of `T` and `E`). The -//! wildcard `_` captures all values of the given type starting with any of the constructors for -//! that type. +//! # Constructors and fields //! -//! We use this to compute whether different patterns might capture a same value. Do the patterns -//! `Ok("foo")` and `Err(_)` capture a common value? The answer is no, because the first pattern -//! captures only values starting with the `Ok` constructor and the second only values starting -//! with the `Err` constructor. Do the patterns `Some(42)` and `Some(1..10)` intersect? They might, -//! since they both capture values starting with `Some`. To be certain, we need to dig under the -//! `Some` constructor and continue asking the question. This is the main idea behind the -//! exhaustiveness algorithm: by looking at patterns constructor-by-constructor, we can efficiently -//! figure out if some new pattern might capture a value that hadn't been captured by previous -//! patterns. +//! Note: we will often abbreviate "constructor" as "ctor". //! -//! Constructors are represented by the `Constructor` enum, and its fields by the `Fields` enum. -//! Most of the complexity of this file resides in transforming between patterns and -//! (`Constructor`, `Fields`) pairs, handling all the special cases correctly. +//! The idea that powers everything that is done in this file is the following: a (matcheable) +//! value is made from a constructor applied to a number of subvalues. Examples of constructors are +//! `Some`, `None`, `(,)` (the 2-tuple constructor), `Foo {..}` (the constructor for a struct +//! `Foo`), and `2` (the constructor for the number `2`). This is natural when we think of +//! pattern-matching, and this is the basis for what follows. //! -//! Caveat: this constructors/fields distinction doesn't quite cover every Rust value. For example -//! a value of type `Rc<u64>` doesn't fit this idea very well, nor do various other things. -//! However, this idea covers most of the cases that are relevant to exhaustiveness checking. +//! Some of the ctors listed above might feel weird: `None` and `2` don't take any arguments. +//! That's ok: those are ctors that take a list of 0 arguments; they are the simplest case of +//! ctors. We treat `2` as a ctor because `u64` and other number types behave exactly like a huge +//! `enum`, with one variant for each number. This allows us to see any matcheable value as made up +//! from a tree of ctors, each having a set number of children. For example: `Foo { bar: None, +//! baz: Ok(0) }` is made from 4 different ctors, namely `Foo{..}`, `None`, `Ok` and `0`. //! +//! This idea can be extended to patterns: they are also made from constructors applied to fields. +//! A pattern for a given type is allowed to use all the ctors for values of that type (which we +//! call "value constructors"), but there are also pattern-only ctors. The most important one is +//! the wildcard (`_`), and the others are integer ranges (`0..=10`), variable-length slices (`[x, +//! ..]`), and or-patterns (`Ok(0) | Err(_)`). Examples of valid patterns are `42`, `Some(_)`, `Foo +//! { bar: Some(0) | None, baz: _ }`. Note that a binder in a pattern (e.g. `Some(x)`) matches the +//! same values as a wildcard (e.g. `Some(_)`), so we treat both as wildcards. //! -//! # Algorithm +//! From this deconstruction we can compute whether a given value matches a given pattern; we +//! simply look at ctors one at a time. Given a pattern `p` and a value `v`, we want to compute +//! `matches!(v, p)`. It's mostly straightforward: we compare the head ctors and when they match +//! we compare their fields recursively. A few representative examples: //! -//! Recall that `U(P, p)` represents whether, given an existing list of patterns (aka matrix) `P`, -//! adding a new pattern `p` will cover previously-uncovered values of the type. -//! During the course of the algorithm, the rows of the matrix won't just be individual patterns, -//! but rather partially-deconstructed patterns in the form of a list of fields. The paper -//! calls those pattern-vectors, and we will call them pattern-stacks. The same holds for the -//! new pattern `p`. +//! - `matches!(v, _) := true` +//! - `matches!((v0, v1), (p0, p1)) := matches!(v0, p0) && matches!(v1, p1)` +//! - `matches!(Foo { bar: v0, baz: v1 }, Foo { bar: p0, baz: p1 }) := matches!(v0, p0) && matches!(v1, p1)` +//! - `matches!(Ok(v0), Ok(p0)) := matches!(v0, p0)` +//! - `matches!(Ok(v0), Err(p0)) := false` (incompatible variants) +//! - `matches!(v, 1..=100) := matches!(v, 1) || ... || matches!(v, 100)` +//! - `matches!([v0], [p0, .., p1]) := false` (incompatible lengths) +//! - `matches!([v0, v1, v2], [p0, .., p1]) := matches!(v0, p0) && matches!(v2, p1)` +//! - `matches!(v, p0 | p1) := matches!(v, p0) || matches!(v, p1)` //! -//! For example, say we have the following: +//! Constructors, fields and relevant operations are defined in the [`super::deconstruct_pat`] module. //! +//! Note: this constructors/fields distinction may not straightforwardly apply to every Rust type. +//! For example a value of type `Rc<u64>` can't be deconstructed that way, and `&str` has an +//! infinitude of constructors. There are also subtleties with visibility of fields and +//! uninhabitedness and various other things. The constructors idea can be extended to handle most +//! of these subtleties though; caveats are documented where relevant throughout the code. +//! +//! Whether constructors cover each other is computed by [`Constructor::is_covered_by`]. +//! +//! +//! # Specialization +//! +//! Recall that we wish to compute `usefulness(p_1 .. p_n, q)`: given a list of patterns `p_1 .. +//! p_n` and a pattern `q`, all of the same type, we want to find a list of values (called +//! "witnesses") that are matched by `q` and by none of the `p_i`. We obviously don't just +//! enumerate all possible values. From the discussion above we see that we can proceed +//! ctor-by-ctor: for each value ctor of the given type, we ask "is there a value that starts with +//! this constructor and matches `q` and none of the `p_i`?". As we saw above, there's a lot we can +//! say from knowing only the first constructor of our candidate value. +//! +//! Let's take the following example: //! ``` -//! // x: (Option<bool>, Result<()>) //! match x { -//! (Some(true), _) => {} -//! (None, Err(())) => {} -//! (None, Err(_)) => {} +//! Enum::Variant1(_) => {} // `p1` +//! Enum::Variant2(None, 0) => {} // `p2` +//! Enum::Variant2(Some(_), 0) => {} // `q` //! } //! ``` //! -//! Here, the matrix `P` starts as: +//! We can easily see that if our candidate value `v` starts with `Variant1` it will not match `q`. +//! If `v = Variant2(v0, v1)` however, whether or not it matches `p2` and `q` will depend on `v0` +//! and `v1`. In fact, such a `v` will be a witness of usefulness of `q` exactly when the tuple +//! `(v0, v1)` is a witness of usefulness of `q'` in the following reduced match: //! //! ``` -//! [ -//! [(Some(true), _)], -//! [(None, Err(()))], -//! [(None, Err(_))], -//! ] +//! match x { +//! (None, 0) => {} // `p2'` +//! (Some(_), 0) => {} // `q'` +//! } //! ``` //! -//! We can tell it's not exhaustive, because `U(P, _)` is true (we're not covering -//! `[(Some(false), _)]`, for instance). In addition, row 3 is not useful, because -//! all the values it covers are already covered by row 2. +//! This motivates a new step in computing usefulness, that we call _specialization_. +//! Specialization consist of filtering a list of patterns for those that match a constructor, and +//! then looking into the constructor's fields. This enables usefulness to be computed recursively. //! -//! A list of patterns can be thought of as a stack, because we are mainly interested in the top of -//! the stack at any given point, and we can pop or apply constructors to get new pattern-stacks. -//! To match the paper, the top of the stack is at the beginning / on the left. +//! Instead of acting on a single pattern in each row, we will consider a list of patterns for each +//! row, and we call such a list a _pattern-stack_. The idea is that we will specialize the +//! leftmost pattern, which amounts to popping the constructor and pushing its fields, which feels +//! like a stack. We note a pattern-stack simply with `[p_1 ... p_n]`. +//! Here's a sequence of specializations of a list of pattern-stacks, to illustrate what's +//! happening: +//! ``` +//! [Enum::Variant1(_)] +//! [Enum::Variant2(None, 0)] +//! [Enum::Variant2(Some(_), 0)] +//! //==>> specialize with `Variant2` +//! [None, 0] +//! [Some(_), 0] +//! //==>> specialize with `Some` +//! [_, 0] +//! //==>> specialize with `true` (say the type was `bool`) +//! [0] +//! //==>> specialize with `0` +//! [] +//! ``` //! -//! There are two important operations on pattern-stacks necessary to understand the algorithm: +//! The function `specialize(c, p)` takes a value constructor `c` and a pattern `p`, and returns 0 +//! or more pattern-stacks. If `c` does not match the head constructor of `p`, it returns nothing; +//! otherwise if returns the fields of the constructor. This only returns more than one +//! pattern-stack if `p` has a pattern-only constructor. //! -//! 1. We can pop a given constructor off the top of a stack. This operation is called -//! `specialize`, and is denoted `S(c, p)` where `c` is a constructor (like `Some` or -//! `None`) and `p` a pattern-stack. -//! If the pattern on top of the stack can cover `c`, this removes the constructor and -//! pushes its arguments onto the stack. It also expands OR-patterns into distinct patterns. -//! Otherwise the pattern-stack is discarded. -//! This essentially filters those pattern-stacks whose top covers the constructor `c` and -//! discards the others. +//! - Specializing for the wrong constructor returns nothing //! -//! For example, the first pattern above initially gives a stack `[(Some(true), _)]`. If we -//! pop the tuple constructor, we are left with `[Some(true), _]`, and if we then pop the -//! `Some` constructor we get `[true, _]`. If we had popped `None` instead, we would get -//! nothing back. +//! `specialize(None, Some(p0)) := []` //! -//! This returns zero or more new pattern-stacks, as follows. We look at the pattern `p_1` -//! on top of the stack, and we have four cases: +//! - Specializing for the correct constructor returns a single row with the fields //! -//! 1.1. `p_1 = c(r_1, .., r_a)`, i.e. the top of the stack has constructor `c`. We -//! push onto the stack the arguments of this constructor, and return the result: -//! `r_1, .., r_a, p_2, .., p_n` +//! `specialize(Variant1, Variant1(p0, p1, p2)) := [[p0, p1, p2]]` //! -//! 1.2. `p_1 = c'(r_1, .., r_a')` where `c ≠c'`. We discard the current stack and -//! return nothing. +//! `specialize(Foo{..}, Foo { bar: p0, baz: p1 }) := [[p0, p1]]` //! -//! 1.3. `p_1 = _`. We push onto the stack as many wildcards as the constructor `c` has -//! arguments (its arity), and return the resulting stack: -//! `_, .., _, p_2, .., p_n` +//! - For or-patterns, we specialize each branch and concatenate the results //! -//! 1.4. `p_1 = r_1 | r_2`. We expand the OR-pattern and then recurse on each resulting -//! stack: -//! - `S(c, (r_1, p_2, .., p_n))` -//! - `S(c, (r_2, p_2, .., p_n))` +//! `specialize(c, p0 | p1) := specialize(c, p0) ++ specialize(c, p1)` //! -//! 2. We can pop a wildcard off the top of the stack. This is called `S(_, p)`, where `p` is -//! a pattern-stack. Note: the paper calls this `D(p)`. -//! This is used when we know there are missing constructor cases, but there might be -//! existing wildcard patterns, so to check the usefulness of the matrix, we have to check -//! all its *other* components. +//! - We treat the other pattern constructors as if they were a large or-pattern of all the +//! possibilities: //! -//! It is computed as follows. We look at the pattern `p_1` on top of the stack, -//! and we have three cases: -//! 2.1. `p_1 = c(r_1, .., r_a)`. We discard the current stack and return nothing. -//! 2.2. `p_1 = _`. We return the rest of the stack: -//! p_2, .., p_n -//! 2.3. `p_1 = r_1 | r_2`. We expand the OR-pattern and then recurse on each resulting -//! stack. -//! - `S(_, (r_1, p_2, .., p_n))` -//! - `S(_, (r_2, p_2, .., p_n))` +//! `specialize(c, _) := specialize(c, Variant1(_) | Variant2(_, _) | ...)` //! -//! Note that the OR-patterns are not always used directly in Rust, but are used to derive the -//! exhaustive integer matching rules, so they're written here for posterity. +//! `specialize(c, 1..=100) := specialize(c, 1 | ... | 100)` //! -//! Both those operations extend straightforwardly to a list or pattern-stacks, i.e. a matrix, by -//! working row-by-row. Popping a constructor ends up keeping only the matrix rows that start with -//! the given constructor, and popping a wildcard keeps those rows that start with a wildcard. +//! `specialize(c, [p0, .., p1]) := specialize(c, [p0, p1] | [p0, _, p1] | [p0, _, _, p1] | ...)` //! +//! - If `c` is a pattern-only constructor, `specialize` is defined on a case-by-case basis. See +//! the discussion about constructor splitting in [`super::deconstruct_pat`]. //! -//! The algorithm for computing `U` -//! ------------------------------- -//! The algorithm is inductive (on the number of columns: i.e., components of tuple patterns). -//! That means we're going to check the components from left-to-right, so the algorithm -//! operates principally on the first component of the matrix and new pattern-stack `p`. -//! This algorithm is realised in the `is_useful` function. //! -//! Base case. (`n = 0`, i.e., an empty tuple pattern) -//! - If `P` already contains an empty pattern (i.e., if the number of patterns `m > 0`), -//! then `U(P, p)` is false. -//! - Otherwise, `P` must be empty, so `U(P, p)` is true. +//! We then extend this function to work with pattern-stacks as input, by acting on the first +//! column and keeping the other columns untouched. //! -//! Inductive step. (`n > 0`, i.e., whether there's at least one column -//! [which may then be expanded into further columns later]) -//! We're going to match on the top of the new pattern-stack, `p_1`. -//! - If `p_1 == c(r_1, .., r_a)`, i.e. we have a constructor pattern. -//! Then, the usefulness of `p_1` can be reduced to whether it is useful when -//! we ignore all the patterns in the first column of `P` that involve other constructors. -//! This is where `S(c, P)` comes in: -//! `U(P, p) := U(S(c, P), S(c, p))` +//! Specialization for the whole matrix is done in [`Matrix::specialize_constructor`]. Note that +//! or-patterns in the first column are expanded before being stored in the matrix. Specialization +//! for a single patstack is done from a combination of [`Constructor::is_covered_by`] and +//! [`PatStack::pop_head_constructor`]. The internals of how it's done mostly live in the +//! [`Fields`] struct. //! -//! For example, if `P` is: //! -//! ``` -//! [ -//! [Some(true), _], -//! [None, 0], -//! ] -//! ``` +//! # Computing usefulness //! -//! and `p` is `[Some(false), 0]`, then we don't care about row 2 since we know `p` only -//! matches values that row 2 doesn't. For row 1 however, we need to dig into the -//! arguments of `Some` to know whether some new value is covered. So we compute -//! `U([[true, _]], [false, 0])`. +//! We now have all we need to compute usefulness. The inputs to usefulness are a list of +//! pattern-stacks `p_1 ... p_n` (one per row), and a new pattern_stack `q`. The paper and this +//! file calls the list of patstacks a _matrix_. They must all have the same number of columns and +//! the patterns in a given column must all have the same type. `usefulness` returns a (possibly +//! empty) list of witnesses of usefulness. These witnesses will also be pattern-stacks. //! -//! - If `p_1 == _`, then we look at the list of constructors that appear in the first -//! component of the rows of `P`: -//! + If there are some constructors that aren't present, then we might think that the -//! wildcard `_` is useful, since it covers those constructors that weren't covered -//! before. -//! That's almost correct, but only works if there were no wildcards in those first -//! components. So we need to check that `p` is useful with respect to the rows that -//! start with a wildcard, if there are any. This is where `S(_, x)` comes in: -//! `U(P, p) := U(S(_, P), S(_, p))` +//! - base case: `n_columns == 0`. +//! Since a pattern-stack functions like a tuple of patterns, an empty one functions like the +//! unit type. Thus `q` is useful iff there are no rows above it, i.e. if `n == 0`. //! -//! For example, if `P` is: +//! - inductive case: `n_columns > 0`. +//! We need a way to list the constructors we want to try. We will be more clever in the next +//! section but for now assume we list all value constructors for the type of the first column. //! -//! ``` -//! [ -//! [_, true, _], -//! [None, false, 1], -//! ] -//! ``` +//! - for each such ctor `c`: +//! +//! - for each `q'` returned by `specialize(c, q)`: //! -//! and `p` is `[_, false, _]`, the `Some` constructor doesn't appear in `P`. So if we -//! only had row 2, we'd know that `p` is useful. However row 1 starts with a -//! wildcard, so we need to check whether `U([[true, _]], [false, 1])`. +//! - we compute `usefulness(specialize(c, p_1) ... specialize(c, p_n), q')` //! -//! + Otherwise, all possible constructors (for the relevant type) are present. In this -//! case we must check whether the wildcard pattern covers any unmatched value. For -//! that, we can think of the `_` pattern as a big OR-pattern that covers all -//! possible constructors. For `Option`, that would mean `_ = None | Some(_)` for -//! example. The wildcard pattern is useful in this case if it is useful when -//! specialized to one of the possible constructors. So we compute: -//! `U(P, p) := ∃(k ϵ constructors) U(S(k, P), S(k, p))` +//! - for each witness found, we revert specialization by pushing the constructor `c` on top. //! -//! For example, if `P` is: +//! - We return the concatenation of all the witnesses found, if any. //! +//! Example: //! ``` -//! [ -//! [Some(true), _], -//! [None, false], -//! ] +//! [Some(true)] // p_1 +//! [None] // p_2 +//! [Some(_)] // q +//! //==>> try `None`: `specialize(None, q)` returns nothing +//! //==>> try `Some`: `specialize(Some, q)` returns a single row +//! [true] // p_1' +//! [_] // q' +//! //==>> try `true`: `specialize(true, q')` returns a single row +//! [] // p_1'' +//! [] // q'' +//! //==>> base case; `n != 0` so `q''` is not useful. +//! //==>> go back up a step +//! [true] // p_1' +//! [_] // q' +//! //==>> try `false`: `specialize(false, q')` returns a single row +//! [] // q'' +//! //==>> base case; `n == 0` so `q''` is useful. We return the single witness `[]` +//! witnesses: +//! [] +//! //==>> undo the specialization with `false` +//! witnesses: +//! [false] +//! //==>> undo the specialization with `Some` +//! witnesses: +//! [Some(false)] +//! //==>> we have tried all the constructors. The output is the single witness `[Some(false)]`. //! ``` //! -//! and `p` is `[_, false]`, both `None` and `Some` constructors appear in the first -//! components of `P`. We will therefore try popping both constructors in turn: we -//! compute `U([[true, _]], [_, false])` for the `Some` constructor, and `U([[false]], -//! [false])` for the `None` constructor. The first case returns true, so we know that -//! `p` is useful for `P`. Indeed, it matches `[Some(false), _]` that wasn't matched -//! before. +//! This computation is done in [`is_useful`]. In practice we don't care about the list of +//! witnesses when computing reachability; we only need to know whether any exist. We do keep the +//! witnesses when computing exhaustiveness to report them to the user. +//! //! -//! - If `p_1 == r_1 | r_2`, then the usefulness depends on each `r_i` separately: -//! `U(P, p) := U(P, (r_1, p_2, .., p_n)) -//! || U(P, (r_2, p_2, .., p_n))` +//! # Making usefulness tractable: constructor splitting //! -//! Modifications to the algorithm -//! ------------------------------ -//! The algorithm in the paper doesn't cover some of the special cases that arise in Rust, for -//! example uninhabited types and variable-length slice patterns. These are drawn attention to -//! throughout the code below. I'll make a quick note here about how exhaustive integer matching is -//! accounted for, though. +//! We're missing one last detail: which constructors do we list? Naively listing all value +//! constructors cannot work for types like `u64` or `&str`, so we need to be more clever. The +//! first obvious insight is that we only want to list constructors that are covered by the head +//! constructor of `q`. If it's a value constructor, we only try that one. If it's a pattern-only +//! constructor, we use the final clever idea for this algorithm: _constructor splitting_, where we +//! group together constructors that behave the same. //! -//! Exhaustive integer matching -//! --------------------------- -//! An integer type can be thought of as a (huge) sum type: 1 | 2 | 3 | ... -//! So to support exhaustive integer matching, we can make use of the logic in the paper for -//! OR-patterns. However, we obviously can't just treat ranges x..=y as individual sums, because -//! they are likely gigantic. So we instead treat ranges as constructors of the integers. This means -//! that we have a constructor *of* constructors (the integers themselves). We then need to work -//! through all the inductive step rules above, deriving how the ranges would be treated as -//! OR-patterns, and making sure that they're treated in the same way even when they're ranges. -//! There are really only four special cases here: -//! - When we match on a constructor that's actually a range, we have to treat it as if we would -//! an OR-pattern. -//! + It turns out that we can simply extend the case for single-value patterns in -//! `specialize` to either be *equal* to a value constructor, or *contained within* a range -//! constructor. -//! + When the pattern itself is a range, you just want to tell whether any of the values in -//! the pattern range coincide with values in the constructor range, which is precisely -//! intersection. -//! Since when encountering a range pattern for a value constructor, we also use inclusion, it -//! means that whenever the constructor is a value/range and the pattern is also a value/range, -//! we can simply use intersection to test usefulness. -//! - When we're testing for usefulness of a pattern and the pattern's first component is a -//! wildcard. -//! + If all the constructors appear in the matrix, we have a slight complication. By default, -//! the behaviour (i.e., a disjunction over specialised matrices for each constructor) is -//! invalid, because we want a disjunction over every *integer* in each range, not just a -//! disjunction over every range. This is a bit more tricky to deal with: essentially we need -//! to form equivalence classes of subranges of the constructor range for which the behaviour -//! of the matrix `P` and new pattern `p` are the same. This is described in more -//! detail in `Constructor::split`. -//! + If some constructors are missing from the matrix, it turns out we don't need to do -//! anything special (because we know none of the integers are actually wildcards: i.e., we -//! can't span wildcards using ranges). +//! The details are not necessary to understand this file, so we explain them in +//! [`super::deconstruct_pat`]. Splitting is done by the [`Constructor::split`] function. use self::Usefulness::*; use self::WitnessPreference::*; -use super::deconstruct_pat::{Constructor, Fields, MissingConstructors}; +use super::deconstruct_pat::{Constructor, Fields, SplitWildcard}; use super::{Pat, PatKind}; use super::{PatternFoldable, PatternFolder}; @@ -358,8 +335,6 @@ impl<'a, 'tcx> MatchCheckCtxt<'a, 'tcx> { #[derive(Copy, Clone)] pub(super) struct PatCtxt<'a, 'p, 'tcx> { pub(super) cx: &'a MatchCheckCtxt<'p, 'tcx>, - /// Current state of the matrix. - pub(super) matrix: &'a Matrix<'p, 'tcx>, /// Type of the current column under investigation. pub(super) ty: Ty<'tcx>, /// Span of the current pattern under investigation. @@ -473,7 +448,7 @@ impl<'p, 'tcx> PatStack<'p, 'tcx> { // We pop the head pattern and push the new fields extracted from the arguments of // `self.head()`. let mut new_fields = - ctor_wild_subpatterns.replace_with_pattern_arguments(self.head()).filtered_patterns(); + ctor_wild_subpatterns.replace_with_pattern_arguments(self.head()).into_patterns(); new_fields.extend_from_slice(&self.pats[1..]); PatStack::from_vec(new_fields) } @@ -538,7 +513,7 @@ impl<'p, 'tcx> Matrix<'p, 'tcx> { pub(super) fn head_ctors<'a>( &'a self, cx: &'a MatchCheckCtxt<'p, 'tcx>, - ) -> impl Iterator<Item = &'a Constructor<'tcx>> + Captures<'p> { + ) -> impl Iterator<Item = &'a Constructor<'tcx>> + Captures<'p> + Clone { self.patterns.iter().map(move |r| r.head_ctor(cx)) } @@ -804,14 +779,25 @@ impl<'tcx> Usefulness<'tcx> { fn apply_constructor<'p>( self, pcx: PatCtxt<'_, 'p, 'tcx>, + matrix: &Matrix<'p, 'tcx>, // used to compute missing ctors ctor: &Constructor<'tcx>, ctor_wild_subpatterns: &Fields<'p, 'tcx>, ) -> Self { match self { UsefulWithWitness(witnesses) => { - let new_witnesses = if ctor.is_wildcard() { - let missing_ctors = MissingConstructors::new(pcx); - let new_patterns = missing_ctors.report_patterns(pcx); + let new_witnesses = if matches!(ctor, Constructor::Missing) { + let mut split_wildcard = SplitWildcard::new(pcx); + split_wildcard.split(pcx, matrix.head_ctors(pcx.cx)); + // Construct for each missing constructor a "wild" version of this + // constructor, that matches everything that can be built with + // it. For example, if `ctor` is a `Constructor::Variant` for + // `Option::Some`, we get the pattern `Some(_)`. + let new_patterns: Vec<_> = split_wildcard + .iter_missing(pcx) + .map(|missing_ctor| { + Fields::wildcards(pcx, missing_ctor).apply(pcx, missing_ctor) + }) + .collect(); witnesses .into_iter() .flat_map(|witness| { @@ -866,10 +852,10 @@ enum WitnessPreference { /// We'll perform the following steps: /// 1. Start with an empty witness /// `Witness(vec![])` -/// 2. Push a witness `Some(_)` against the `None` -/// `Witness(vec![Some(_)])` -/// 3. Push a witness `true` against the `false` -/// `Witness(vec![Some(_), true])` +/// 2. Push a witness `true` against the `false` +/// `Witness(vec![true])` +/// 3. Push a witness `Some(_)` against the `None` +/// `Witness(vec![true, Some(_)])` /// 4. Apply the `Pair` constructor to the witnesses /// `Witness(vec![Pair(Some(_), true)])` /// @@ -967,7 +953,7 @@ fn is_useful<'p, 'tcx>( // FIXME(Nadrieril): Hack to work around type normalization issues (see #72476). let ty = matrix.heads().next().map(|r| r.ty).unwrap_or(v.head().ty); - let pcx = PatCtxt { cx, matrix, ty, span: v.head().span, is_top_level }; + let pcx = PatCtxt { cx, ty, span: v.head().span, is_top_level }; debug!("is_useful_expand_first_col: ty={:#?}, expanding {:#?}", pcx.ty, v.head()); @@ -991,18 +977,30 @@ fn is_useful<'p, 'tcx>( }); Usefulness::merge(usefulnesses) } else { + let v_ctor = v.head_ctor(cx); + if let Constructor::IntRange(ctor_range) = &v_ctor { + // Lint on likely incorrect range patterns (#63987) + ctor_range.lint_overlapping_range_endpoints( + pcx, + matrix.head_ctors_and_spans(cx), + matrix.column_count().unwrap_or(0), + hir_id, + ) + } // We split the head constructor of `v`. - let ctors = v.head_ctor(cx).split(pcx, Some(hir_id)); + let split_ctors = v_ctor.split(pcx, matrix.head_ctors(cx)); // For each constructor, we compute whether there's a value that starts with it that would // witness the usefulness of `v`. - let usefulnesses = ctors.into_iter().map(|ctor| { + let start_matrix = &matrix; + let usefulnesses = split_ctors.into_iter().map(|ctor| { // We cache the result of `Fields::wildcards` because it is used a lot. let ctor_wild_subpatterns = Fields::wildcards(pcx, &ctor); - let matrix = pcx.matrix.specialize_constructor(pcx, &ctor, &ctor_wild_subpatterns); + let spec_matrix = + start_matrix.specialize_constructor(pcx, &ctor, &ctor_wild_subpatterns); let v = v.pop_head_constructor(&ctor_wild_subpatterns); let usefulness = - is_useful(pcx.cx, &matrix, &v, witness_preference, hir_id, is_under_guard, false); - usefulness.apply_constructor(pcx, &ctor, &ctor_wild_subpatterns) + is_useful(cx, &spec_matrix, &v, witness_preference, hir_id, is_under_guard, false); + usefulness.apply_constructor(pcx, start_matrix, &ctor, &ctor_wild_subpatterns) }); Usefulness::merge(usefulnesses) }; @@ -1013,7 +1011,7 @@ fn is_useful<'p, 'tcx>( /// The arm of a match expression. #[derive(Clone, Copy)] crate struct MatchArm<'p, 'tcx> { - /// The pattern must have been lowered through `MatchVisitor::lower_pattern`. + /// The pattern must have been lowered through `check_match::MatchVisitor::lower_pattern`. crate pat: &'p super::Pat<'tcx>, crate hir_id: HirId, crate has_guard: bool, @@ -1031,7 +1029,8 @@ crate struct UsefulnessReport<'p, 'tcx> { /// The entrypoint for the usefulness algorithm. Computes whether a match is exhaustive and which /// of its arms are reachable. /// -/// Note: the input patterns must have been lowered through `MatchVisitor::lower_pattern`. +/// Note: the input patterns must have been lowered through +/// `check_match::MatchVisitor::lower_pattern`. crate fn compute_match_usefulness<'p, 'tcx>( cx: &MatchCheckCtxt<'p, 'tcx>, arms: &[MatchArm<'p, 'tcx>], diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index 350a372a684..98c7b9a63a5 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -1912,4 +1912,22 @@ impl<'a> Parser<'a> { *self = snapshot; Err(err) } + + /// Get the diagnostics for the cases where `move async` is found. + /// + /// `move_async_span` starts at the 'm' of the move keyword and ends with the 'c' of the async keyword + pub(super) fn incorrect_move_async_order_found( + &self, + move_async_span: Span, + ) -> DiagnosticBuilder<'a> { + let mut err = + self.struct_span_err(move_async_span, "the order of `move` and `async` is incorrect"); + err.span_suggestion_verbose( + move_async_span, + "try switching the order", + "async move".to_owned(), + Applicability::MaybeIncorrect, + ); + err + } } diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index eed3e9947b2..b147f42fada 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1603,7 +1603,7 @@ impl<'a> Parser<'a> { self.sess.gated_spans.gate(sym::async_closure, span); } - let capture_clause = self.parse_capture_clause(); + let capture_clause = self.parse_capture_clause()?; let decl = self.parse_fn_block_decl()?; let decl_hi = self.prev_token.span; let body = match decl.output { @@ -1626,8 +1626,18 @@ impl<'a> Parser<'a> { } /// Parses an optional `move` prefix to a closure-like construct. - fn parse_capture_clause(&mut self) -> CaptureBy { - if self.eat_keyword(kw::Move) { CaptureBy::Value } else { CaptureBy::Ref } + fn parse_capture_clause(&mut self) -> PResult<'a, CaptureBy> { + if self.eat_keyword(kw::Move) { + // Check for `move async` and recover + if self.check_keyword(kw::Async) { + let move_async_span = self.token.span.with_lo(self.prev_token.span.data().lo); + Err(self.incorrect_move_async_order_found(move_async_span)) + } else { + Ok(CaptureBy::Value) + } + } else { + Ok(CaptureBy::Ref) + } } /// Parses the `|arg, arg|` header of a closure. @@ -2019,7 +2029,7 @@ impl<'a> Parser<'a> { fn parse_async_block(&mut self, mut attrs: AttrVec) -> PResult<'a, P<Expr>> { let lo = self.token.span; self.expect_keyword(kw::Async)?; - let capture_clause = self.parse_capture_clause(); + let capture_clause = self.parse_capture_clause()?; let (iattrs, body) = self.parse_inner_attrs_and_block()?; attrs.extend(iattrs); let kind = ExprKind::Async(capture_clause, DUMMY_NODE_ID, body); diff --git a/compiler/rustc_query_system/src/dep_graph/dep_node.rs b/compiler/rustc_query_system/src/dep_graph/dep_node.rs index 09e5dc857a7..ff52fdab19c 100644 --- a/compiler/rustc_query_system/src/dep_graph/dep_node.rs +++ b/compiler/rustc_query_system/src/dep_graph/dep_node.rs @@ -60,9 +60,8 @@ pub struct DepNode<K> { // * When a `DepNode::construct` is called, `arg.to_fingerprint()` // is responsible for calling `OnDiskCache::store_foreign_def_id_hash` // if needed - // * When a `DepNode` is loaded from the `PreviousDepGraph`, - // then `PreviousDepGraph::index_to_node` is responsible for calling - // `tcx.register_reused_dep_path_hash` + // * When we serialize the on-disk cache, `OnDiskCache::serialize` is + // responsible for calling `DepGraph::register_reused_dep_nodes`. // // FIXME: Enforce this by preventing manual construction of `DefNode` // (e.g. add a `_priv: ()` field) diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index 956d476d973..605d7ae4af6 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -3,7 +3,7 @@ use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::profiling::QueryInvocationId; use rustc_data_structures::sharded::{self, Sharded}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; -use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc, Ordering}; +use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, LockGuard, Lrc, Ordering}; use rustc_data_structures::unlikely; use rustc_errors::Diagnostic; use rustc_index::vec::{Idx, IndexVec}; @@ -15,6 +15,7 @@ use std::env; use std::hash::Hash; use std::marker::PhantomData; use std::mem; +use std::ops::Range; use std::sync::atomic::Ordering::Relaxed; use super::debug::EdgeFilter; @@ -68,7 +69,7 @@ struct DepGraphData<K: DepKind> { /// The new encoding of the dependency graph, optimized for red/green /// tracking. The `current` field is the dependency graph of only the /// current compilation session: We don't merge the previous dep-graph into - /// current one anymore. + /// current one anymore, but we do reference shared data to save space. current: CurrentDepGraph<K>, /// The dep-graph from the previous compilation session. It contains all @@ -134,17 +135,61 @@ impl<K: DepKind> DepGraph<K> { } pub fn query(&self) -> DepGraphQuery<K> { - let data = self.data.as_ref().unwrap().current.data.lock(); - let nodes: Vec<_> = data.iter().map(|n| n.node).collect(); - let mut edges = Vec::new(); - for (from, edge_targets) in data.iter().map(|d| (d.node, &d.edges)) { - for &edge_target in edge_targets.iter() { - let to = data[edge_target].node; - edges.push((from, to)); + let data = self.data.as_ref().unwrap(); + let previous = &data.previous; + + // Note locking order: `prev_index_to_index`, then `data`. + let prev_index_to_index = data.current.prev_index_to_index.lock(); + let data = data.current.data.lock(); + let node_count = data.hybrid_indices.len(); + let edge_count = self.edge_count(&data); + + let mut nodes = Vec::with_capacity(node_count); + let mut edge_list_indices = Vec::with_capacity(node_count); + let mut edge_list_data = Vec::with_capacity(edge_count); + + // See `serialize` for notes on the approach used here. + + edge_list_data.extend(data.unshared_edges.iter().map(|i| i.index())); + + for &hybrid_index in data.hybrid_indices.iter() { + match hybrid_index.into() { + HybridIndex::New(new_index) => { + nodes.push(data.new.nodes[new_index]); + let edges = &data.new.edges[new_index]; + edge_list_indices.push((edges.start.index(), edges.end.index())); + } + HybridIndex::Red(red_index) => { + nodes.push(previous.index_to_node(data.red.node_indices[red_index])); + let edges = &data.red.edges[red_index]; + edge_list_indices.push((edges.start.index(), edges.end.index())); + } + HybridIndex::LightGreen(lg_index) => { + nodes.push(previous.index_to_node(data.light_green.node_indices[lg_index])); + let edges = &data.light_green.edges[lg_index]; + edge_list_indices.push((edges.start.index(), edges.end.index())); + } + HybridIndex::DarkGreen(prev_index) => { + nodes.push(previous.index_to_node(prev_index)); + + let edges_iter = previous + .edge_targets_from(prev_index) + .iter() + .map(|&dst| prev_index_to_index[dst].unwrap().index()); + + let start = edge_list_data.len(); + edge_list_data.extend(edges_iter); + let end = edge_list_data.len(); + edge_list_indices.push((start, end)); + } } } - DepGraphQuery::new(&nodes[..], &edges[..]) + debug_assert_eq!(nodes.len(), node_count); + debug_assert_eq!(edge_list_indices.len(), node_count); + debug_assert_eq!(edge_list_data.len(), edge_count); + + DepGraphQuery::new(&nodes[..], &edge_list_indices[..], &edge_list_data[..]) } pub fn assert_ignored(&self) { @@ -201,7 +246,6 @@ impl<K: DepKind> DepGraph<K> { key, cx, arg, - false, task, |_key| { Some(TaskDeps { @@ -212,7 +256,6 @@ impl<K: DepKind> DepGraph<K> { phantom_data: PhantomData, }) }, - |data, key, fingerprint, task| data.complete_task(key, task.unwrap(), fingerprint), hash_result, ) } @@ -222,66 +265,69 @@ impl<K: DepKind> DepGraph<K> { key: DepNode<K>, cx: Ctxt, arg: A, - no_tcx: bool, task: fn(Ctxt, A) -> R, create_task: fn(DepNode<K>) -> Option<TaskDeps<K>>, - finish_task_and_alloc_depnode: fn( - &CurrentDepGraph<K>, - DepNode<K>, - Fingerprint, - Option<TaskDeps<K>>, - ) -> DepNodeIndex, hash_result: impl FnOnce(&mut Ctxt::StableHashingContext, &R) -> Option<Fingerprint>, ) -> (R, DepNodeIndex) { if let Some(ref data) = self.data { let task_deps = create_task(key).map(Lock::new); + let result = K::with_deps(task_deps.as_ref(), || task(cx, arg)); + let edges = task_deps.map_or_else(|| smallvec![], |lock| lock.into_inner().reads); - // In incremental mode, hash the result of the task. We don't - // do anything with the hash yet, but we are computing it - // anyway so that - // - we make sure that the infrastructure works and - // - we can get an idea of the runtime cost. let mut hcx = cx.create_stable_hashing_context(); - - let result = if no_tcx { - task(cx, arg) - } else { - K::with_deps(task_deps.as_ref(), || task(cx, arg)) - }; - let current_fingerprint = hash_result(&mut hcx, &result); - let dep_node_index = finish_task_and_alloc_depnode( - &data.current, - key, - current_fingerprint.unwrap_or(Fingerprint::ZERO), - task_deps.map(|lock| lock.into_inner()), - ); - let print_status = cfg!(debug_assertions) && cx.debug_dep_tasks(); - // Determine the color of the new DepNode. - if let Some(prev_index) = data.previous.node_to_index_opt(&key) { - let prev_fingerprint = data.previous.fingerprint_by_index(prev_index); - - let color = if let Some(current_fingerprint) = current_fingerprint { - if current_fingerprint == prev_fingerprint { + // Intern the new `DepNode`. + let dep_node_index = if let Some(prev_index) = data.previous.node_to_index_opt(&key) { + // Determine the color and index of the new `DepNode`. + let (color, dep_node_index) = if let Some(current_fingerprint) = current_fingerprint + { + if current_fingerprint == data.previous.fingerprint_by_index(prev_index) { if print_status { eprintln!("[task::green] {:?}", key); } - DepNodeColor::Green(dep_node_index) + + // This is a light green node: it existed in the previous compilation, + // its query was re-executed, and it has the same result as before. + let dep_node_index = + data.current.intern_light_green_node(&data.previous, prev_index, edges); + + (DepNodeColor::Green(dep_node_index), dep_node_index) } else { if print_status { eprintln!("[task::red] {:?}", key); } - DepNodeColor::Red + + // This is a red node: it existed in the previous compilation, its query + // was re-executed, but it has a different result from before. + let dep_node_index = data.current.intern_red_node( + &data.previous, + prev_index, + edges, + current_fingerprint, + ); + + (DepNodeColor::Red, dep_node_index) } } else { if print_status { eprintln!("[task::unknown] {:?}", key); } - // Mark the node as Red if we can't hash the result - DepNodeColor::Red + + // This is a red node, effectively: it existed in the previous compilation + // session, its query was re-executed, but it doesn't compute a result hash + // (i.e. it represents a `no_hash` query), so we have no way of determining + // whether or not the result was the same as before. + let dep_node_index = data.current.intern_red_node( + &data.previous, + prev_index, + edges, + Fingerprint::ZERO, + ); + + (DepNodeColor::Red, dep_node_index) }; debug_assert!( @@ -292,12 +338,27 @@ impl<K: DepKind> DepGraph<K> { ); data.colors.insert(prev_index, color); - } else if print_status { - eprintln!("[task::new] {:?}", key); - } + dep_node_index + } else { + if print_status { + eprintln!("[task::new] {:?}", key); + } + + // This is a new node: it didn't exist in the previous compilation session. + data.current.intern_new_node( + &data.previous, + key, + edges, + current_fingerprint.unwrap_or(Fingerprint::ZERO), + ) + }; (result, dep_node_index) } else { + // Incremental compilation is turned off. We just execute the task + // without tracking. We still provide a dep-node index that uniquely + // identifies the task so that we have a cheap way of referring to + // the query for self-profiling. (task(cx, arg), self.next_virtual_depnode_index()) } } @@ -308,13 +369,36 @@ impl<K: DepKind> DepGraph<K> { where OP: FnOnce() -> R, { + debug_assert!(!dep_kind.is_eval_always()); + if let Some(ref data) = self.data { let task_deps = Lock::new(TaskDeps::default()); - let result = K::with_deps(Some(&task_deps), op); let task_deps = task_deps.into_inner(); - let dep_node_index = data.current.complete_anon_task(dep_kind, task_deps); + // The dep node indices are hashed here instead of hashing the dep nodes of the + // dependencies. These indices may refer to different nodes per session, but this isn't + // a problem here because we that ensure the final dep node hash is per session only by + // combining it with the per session random number `anon_id_seed`. This hash only need + // to map the dependencies to a single value on a per session basis. + let mut hasher = StableHasher::new(); + task_deps.reads.hash(&mut hasher); + + let target_dep_node = DepNode { + kind: dep_kind, + // Fingerprint::combine() is faster than sending Fingerprint + // through the StableHasher (at least as long as StableHasher + // is so slow). + hash: data.current.anon_id_seed.combine(hasher.finish()).into(), + }; + + let dep_node_index = data.current.intern_new_node( + &data.previous, + target_dep_node, + task_deps.reads, + Fingerprint::ZERO, + ); + (result, dep_node_index) } else { (op(), self.next_virtual_depnode_index()) @@ -331,69 +415,106 @@ impl<K: DepKind> DepGraph<K> { task: fn(Ctxt, A) -> R, hash_result: impl FnOnce(&mut Ctxt::StableHashingContext, &R) -> Option<Fingerprint>, ) -> (R, DepNodeIndex) { - self.with_task_impl( - key, - cx, - arg, - false, - task, - |_| None, - |data, key, fingerprint, _| data.alloc_node(key, smallvec![], fingerprint), - hash_result, - ) + self.with_task_impl(key, cx, arg, task, |_| None, hash_result) } #[inline] - pub fn read(&self, v: DepNode<K>) { + pub fn read_index(&self, dep_node_index: DepNodeIndex) { if let Some(ref data) = self.data { - let map = data.current.node_to_node_index.get_shard_by_value(&v).lock(); - if let Some(dep_node_index) = map.get(&v).copied() { - std::mem::drop(map); - data.read_index(dep_node_index); - } else { - panic!("DepKind {:?} should be pre-allocated but isn't.", v.kind) - } + K::read_deps(|task_deps| { + if let Some(task_deps) = task_deps { + let mut task_deps = task_deps.lock(); + let task_deps = &mut *task_deps; + if cfg!(debug_assertions) { + data.current.total_read_count.fetch_add(1, Relaxed); + } + + // As long as we only have a low number of reads we can avoid doing a hash + // insert and potentially allocating/reallocating the hashmap + let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP { + task_deps.reads.iter().all(|other| *other != dep_node_index) + } else { + task_deps.read_set.insert(dep_node_index) + }; + if new_read { + task_deps.reads.push(dep_node_index); + if task_deps.reads.len() == TASK_DEPS_READS_CAP { + // Fill `read_set` with what we have so far so we can use the hashset + // next time + task_deps.read_set.extend(task_deps.reads.iter().copied()); + } + + #[cfg(debug_assertions)] + { + if let Some(target) = task_deps.node { + if let Some(ref forbidden_edge) = data.current.forbidden_edge { + let src = self.dep_node_of(dep_node_index); + if forbidden_edge.test(&src, &target) { + panic!("forbidden edge {:?} -> {:?} created", src, target) + } + } + } + } + } else if cfg!(debug_assertions) { + data.current.total_duplicate_read_count.fetch_add(1, Relaxed); + } + } + }) } } #[inline] - pub fn read_index(&self, dep_node_index: DepNodeIndex) { - if let Some(ref data) = self.data { - data.read_index(dep_node_index); - } + pub fn dep_node_index_of(&self, dep_node: &DepNode<K>) -> DepNodeIndex { + self.dep_node_index_of_opt(dep_node).unwrap() } #[inline] - pub fn dep_node_index_of(&self, dep_node: &DepNode<K>) -> DepNodeIndex { - self.data - .as_ref() - .unwrap() - .current - .node_to_node_index - .get_shard_by_value(dep_node) - .lock() - .get(dep_node) - .cloned() - .unwrap() + pub fn dep_node_index_of_opt(&self, dep_node: &DepNode<K>) -> Option<DepNodeIndex> { + let data = self.data.as_ref().unwrap(); + let current = &data.current; + + if let Some(prev_index) = data.previous.node_to_index_opt(dep_node) { + current.prev_index_to_index.lock()[prev_index] + } else { + current.new_node_to_index.get_shard_by_value(dep_node).lock().get(dep_node).copied() + } } #[inline] pub fn dep_node_exists(&self, dep_node: &DepNode<K>) -> bool { - if let Some(ref data) = self.data { - data.current - .node_to_node_index - .get_shard_by_value(&dep_node) - .lock() - .contains_key(dep_node) - } else { - false + self.data.is_some() && self.dep_node_index_of_opt(dep_node).is_some() + } + + #[inline] + pub fn dep_node_of(&self, dep_node_index: DepNodeIndex) -> DepNode<K> { + let data = self.data.as_ref().unwrap(); + let previous = &data.previous; + let data = data.current.data.lock(); + + match data.hybrid_indices[dep_node_index].into() { + HybridIndex::New(new_index) => data.new.nodes[new_index], + HybridIndex::Red(red_index) => previous.index_to_node(data.red.node_indices[red_index]), + HybridIndex::LightGreen(light_green_index) => { + previous.index_to_node(data.light_green.node_indices[light_green_index]) + } + HybridIndex::DarkGreen(prev_index) => previous.index_to_node(prev_index), } } #[inline] pub fn fingerprint_of(&self, dep_node_index: DepNodeIndex) -> Fingerprint { - let data = self.data.as_ref().expect("dep graph enabled").current.data.lock(); - data[dep_node_index].fingerprint + let data = self.data.as_ref().unwrap(); + let previous = &data.previous; + let data = data.current.data.lock(); + + match data.hybrid_indices[dep_node_index].into() { + HybridIndex::New(new_index) => data.new.fingerprints[new_index], + HybridIndex::Red(red_index) => data.red.fingerprints[red_index], + HybridIndex::LightGreen(light_green_index) => { + previous.fingerprint_by_index(data.light_green.node_indices[light_green_index]) + } + HybridIndex::DarkGreen(prev_index) => previous.fingerprint_by_index(prev_index), + } } pub fn prev_fingerprint_of(&self, dep_node: &DepNode<K>) -> Option<Fingerprint> { @@ -443,30 +564,95 @@ impl<K: DepKind> DepGraph<K> { } } - pub fn serialize(&self) -> SerializedDepGraph<K> { - let data = self.data.as_ref().unwrap().current.data.lock(); + fn edge_count(&self, node_data: &LockGuard<'_, DepNodeData<K>>) -> usize { + let data = self.data.as_ref().unwrap(); + let previous = &data.previous; - let fingerprints: IndexVec<SerializedDepNodeIndex, _> = - data.iter().map(|d| d.fingerprint).collect(); - let nodes: IndexVec<SerializedDepNodeIndex, _> = data.iter().map(|d| d.node).collect(); + let mut edge_count = node_data.unshared_edges.len(); - let total_edge_count: usize = data.iter().map(|d| d.edges.len()).sum(); + for &hybrid_index in node_data.hybrid_indices.iter() { + if let HybridIndex::DarkGreen(prev_index) = hybrid_index.into() { + edge_count += previous.edge_targets_from(prev_index).len() + } + } - let mut edge_list_indices = IndexVec::with_capacity(nodes.len()); - let mut edge_list_data = Vec::with_capacity(total_edge_count); + edge_count + } - for (current_dep_node_index, edges) in data.iter_enumerated().map(|(i, d)| (i, &d.edges)) { - let start = edge_list_data.len() as u32; - // This should really just be a memcpy :/ - edge_list_data.extend(edges.iter().map(|i| SerializedDepNodeIndex::new(i.index()))); - let end = edge_list_data.len() as u32; + pub fn serialize(&self) -> SerializedDepGraph<K> { + type SDNI = SerializedDepNodeIndex; - debug_assert_eq!(current_dep_node_index.index(), edge_list_indices.len()); - edge_list_indices.push((start, end)); + let data = self.data.as_ref().unwrap(); + let previous = &data.previous; + + // Note locking order: `prev_index_to_index`, then `data`. + let prev_index_to_index = data.current.prev_index_to_index.lock(); + let data = data.current.data.lock(); + let node_count = data.hybrid_indices.len(); + let edge_count = self.edge_count(&data); + + let mut nodes = IndexVec::with_capacity(node_count); + let mut fingerprints = IndexVec::with_capacity(node_count); + let mut edge_list_indices = IndexVec::with_capacity(node_count); + let mut edge_list_data = Vec::with_capacity(edge_count); + + // `rustc_middle::ty::query::OnDiskCache` expects nodes to be in + // `DepNodeIndex` order. The edges in `edge_list_data`, on the other + // hand, don't need to be in a particular order, as long as each node + // can reference its edges as a contiguous range within it. This is why + // we're able to copy `unshared_edges` directly into `edge_list_data`. + // It meets the above requirements, and each non-dark-green node already + // knows the range of edges to reference within it, which they'll push + // onto `edge_list_indices`. Dark green nodes, however, don't have their + // edges in `unshared_edges`, so need to add them to `edge_list_data`. + + edge_list_data.extend(data.unshared_edges.iter().map(|i| SDNI::new(i.index()))); + + for &hybrid_index in data.hybrid_indices.iter() { + match hybrid_index.into() { + HybridIndex::New(i) => { + let new = &data.new; + nodes.push(new.nodes[i]); + fingerprints.push(new.fingerprints[i]); + let edges = &new.edges[i]; + edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32())); + } + HybridIndex::Red(i) => { + let red = &data.red; + nodes.push(previous.index_to_node(red.node_indices[i])); + fingerprints.push(red.fingerprints[i]); + let edges = &red.edges[i]; + edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32())); + } + HybridIndex::LightGreen(i) => { + let lg = &data.light_green; + nodes.push(previous.index_to_node(lg.node_indices[i])); + fingerprints.push(previous.fingerprint_by_index(lg.node_indices[i])); + let edges = &lg.edges[i]; + edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32())); + } + HybridIndex::DarkGreen(prev_index) => { + nodes.push(previous.index_to_node(prev_index)); + fingerprints.push(previous.fingerprint_by_index(prev_index)); + + let edges_iter = previous + .edge_targets_from(prev_index) + .iter() + .map(|&dst| prev_index_to_index[dst].as_ref().unwrap()); + + let start = edge_list_data.len() as u32; + edge_list_data.extend(edges_iter.map(|i| SDNI::new(i.index()))); + let end = edge_list_data.len() as u32; + edge_list_indices.push((start, end)); + } + } } + debug_assert_eq!(nodes.len(), node_count); + debug_assert_eq!(fingerprints.len(), node_count); + debug_assert_eq!(edge_list_indices.len(), node_count); + debug_assert_eq!(edge_list_data.len(), edge_count); debug_assert!(edge_list_data.len() <= u32::MAX as usize); - debug_assert_eq!(edge_list_data.len(), total_edge_count); SerializedDepGraph { nodes, fingerprints, edge_list_indices, edge_list_data } } @@ -540,31 +726,22 @@ impl<K: DepKind> DepGraph<K> { #[cfg(not(parallel_compiler))] { - debug_assert!( - !data - .current - .node_to_node_index - .get_shard_by_value(dep_node) - .lock() - .contains_key(dep_node) - ); + debug_assert!(!self.dep_node_exists(dep_node)); debug_assert!(data.colors.get(prev_dep_node_index).is_none()); } // We never try to mark eval_always nodes as green debug_assert!(!dep_node.kind.is_eval_always()); - data.previous.debug_assert_eq(prev_dep_node_index, *dep_node); + debug_assert_eq!(data.previous.index_to_node(prev_dep_node_index), *dep_node); let prev_deps = data.previous.edge_targets_from(prev_dep_node_index); - let mut current_deps = SmallVec::new(); - for &dep_dep_node_index in prev_deps { let dep_dep_node_color = data.colors.get(dep_dep_node_index); match dep_dep_node_color { - Some(DepNodeColor::Green(node_index)) => { + Some(DepNodeColor::Green(_)) => { // This dependency has been marked as green before, we are // still fine and can continue with checking the other // dependencies. @@ -572,9 +749,8 @@ impl<K: DepKind> DepGraph<K> { "try_mark_previous_green({:?}) --- found dependency {:?} to \ be immediately green", dep_node, - data.previous.debug_dep_node(dep_dep_node_index), + data.previous.index_to_node(dep_dep_node_index) ); - current_deps.push(node_index); } Some(DepNodeColor::Red) => { // We found a dependency the value of which has changed @@ -585,12 +761,12 @@ impl<K: DepKind> DepGraph<K> { "try_mark_previous_green({:?}) - END - dependency {:?} was \ immediately red", dep_node, - data.previous.debug_dep_node(dep_dep_node_index) + data.previous.index_to_node(dep_dep_node_index) ); return None; } None => { - let dep_dep_node = &data.previous.index_to_node(dep_dep_node_index, tcx); + let dep_dep_node = &data.previous.index_to_node(dep_dep_node_index); // We don't know the state of this dependency. If it isn't // an eval_always node, let's try to mark it green recursively. @@ -607,13 +783,12 @@ impl<K: DepKind> DepGraph<K> { dep_dep_node_index, dep_dep_node, ); - if let Some(node_index) = node_index { + if node_index.is_some() { debug!( "try_mark_previous_green({:?}) --- managed to MARK \ dependency {:?} as green", dep_node, dep_dep_node ); - current_deps.push(node_index); continue; } } @@ -628,13 +803,12 @@ impl<K: DepKind> DepGraph<K> { let dep_dep_node_color = data.colors.get(dep_dep_node_index); match dep_dep_node_color { - Some(DepNodeColor::Green(node_index)) => { + Some(DepNodeColor::Green(_)) => { debug!( "try_mark_previous_green({:?}) --- managed to \ FORCE dependency {:?} to green", dep_node, dep_dep_node ); - current_deps.push(node_index); } Some(DepNodeColor::Red) => { debug!( @@ -690,13 +864,9 @@ impl<K: DepKind> DepGraph<K> { // There may be multiple threads trying to mark the same dep node green concurrently let dep_node_index = { - // Copy the fingerprint from the previous graph, - // so we don't have to recompute it - let fingerprint = data.previous.fingerprint_by_index(prev_dep_node_index); - // We allocating an entry for the node in the current dependency graph and // adding all the appropriate edges imported from the previous graph - data.current.intern_node(*dep_node, current_deps, fingerprint) + data.current.intern_dark_green_node(&data.previous, prev_dep_node_index) }; // ... emitting any stored diagnostic ... @@ -801,7 +971,7 @@ impl<K: DepKind> DepGraph<K> { for prev_index in data.colors.values.indices() { match data.colors.get(prev_index) { Some(DepNodeColor::Green(_)) => { - let dep_node = data.previous.index_to_node(prev_index, tcx); + let dep_node = data.previous.index_to_node(prev_index); tcx.try_load_from_on_disk_cache(&dep_node); } None | Some(DepNodeColor::Red) => { @@ -813,6 +983,20 @@ impl<K: DepKind> DepGraph<K> { } } + // Register reused dep nodes (i.e. nodes we've marked red or green) with the context. + pub fn register_reused_dep_nodes<Ctxt: DepContext<DepKind = K>>(&self, tcx: Ctxt) { + let data = self.data.as_ref().unwrap(); + for prev_index in data.colors.values.indices() { + match data.colors.get(prev_index) { + Some(DepNodeColor::Red) | Some(DepNodeColor::Green(_)) => { + let dep_node = data.previous.index_to_node(prev_index); + tcx.register_reused_dep_node(&dep_node); + } + None => {} + } + } + } + fn next_virtual_depnode_index(&self) -> DepNodeIndex { let index = self.virtual_dep_node_index.fetch_add(1, Relaxed); DepNodeIndex::from_u32(index) @@ -857,31 +1041,234 @@ pub struct WorkProduct { pub saved_file: Option<String>, } -#[derive(Clone)] +// The maximum value of the follow index types leaves the upper two bits unused +// so that we can store multiple index types in `CompressedHybridIndex`, and use +// those bits to encode which index type it contains. + +// Index type for `NewDepNodeData`. +rustc_index::newtype_index! { + struct NewDepNodeIndex { + MAX = 0x7FFF_FFFF + } +} + +// Index type for `RedDepNodeData`. +rustc_index::newtype_index! { + struct RedDepNodeIndex { + MAX = 0x7FFF_FFFF + } +} + +// Index type for `LightGreenDepNodeData`. +rustc_index::newtype_index! { + struct LightGreenDepNodeIndex { + MAX = 0x7FFF_FFFF + } +} + +/// Compressed representation of `HybridIndex` enum. Bits unused by the +/// contained index types are used to encode which index type it contains. +#[derive(Copy, Clone)] +struct CompressedHybridIndex(u32); + +impl CompressedHybridIndex { + const NEW_TAG: u32 = 0b0000_0000_0000_0000_0000_0000_0000_0000; + const RED_TAG: u32 = 0b0100_0000_0000_0000_0000_0000_0000_0000; + const LIGHT_GREEN_TAG: u32 = 0b1000_0000_0000_0000_0000_0000_0000_0000; + const DARK_GREEN_TAG: u32 = 0b1100_0000_0000_0000_0000_0000_0000_0000; + + const TAG_MASK: u32 = 0b1100_0000_0000_0000_0000_0000_0000_0000; + const INDEX_MASK: u32 = !Self::TAG_MASK; +} + +impl From<NewDepNodeIndex> for CompressedHybridIndex { + #[inline] + fn from(index: NewDepNodeIndex) -> Self { + CompressedHybridIndex(Self::NEW_TAG | index.as_u32()) + } +} + +impl From<RedDepNodeIndex> for CompressedHybridIndex { + #[inline] + fn from(index: RedDepNodeIndex) -> Self { + CompressedHybridIndex(Self::RED_TAG | index.as_u32()) + } +} + +impl From<LightGreenDepNodeIndex> for CompressedHybridIndex { + #[inline] + fn from(index: LightGreenDepNodeIndex) -> Self { + CompressedHybridIndex(Self::LIGHT_GREEN_TAG | index.as_u32()) + } +} + +impl From<SerializedDepNodeIndex> for CompressedHybridIndex { + #[inline] + fn from(index: SerializedDepNodeIndex) -> Self { + CompressedHybridIndex(Self::DARK_GREEN_TAG | index.as_u32()) + } +} + +/// Contains an index into one of several node data collections. Elsewhere, we +/// store `CompressedHyridIndex` instead of this to save space, but convert to +/// this type during processing to take advantage of the enum match ergonomics. +enum HybridIndex { + New(NewDepNodeIndex), + Red(RedDepNodeIndex), + LightGreen(LightGreenDepNodeIndex), + DarkGreen(SerializedDepNodeIndex), +} + +impl From<CompressedHybridIndex> for HybridIndex { + #[inline] + fn from(hybrid_index: CompressedHybridIndex) -> Self { + let index = hybrid_index.0 & CompressedHybridIndex::INDEX_MASK; + + match hybrid_index.0 & CompressedHybridIndex::TAG_MASK { + CompressedHybridIndex::NEW_TAG => HybridIndex::New(NewDepNodeIndex::from_u32(index)), + CompressedHybridIndex::RED_TAG => HybridIndex::Red(RedDepNodeIndex::from_u32(index)), + CompressedHybridIndex::LIGHT_GREEN_TAG => { + HybridIndex::LightGreen(LightGreenDepNodeIndex::from_u32(index)) + } + CompressedHybridIndex::DARK_GREEN_TAG => { + HybridIndex::DarkGreen(SerializedDepNodeIndex::from_u32(index)) + } + _ => unreachable!(), + } + } +} + +// Index type for `DepNodeData`'s edges. +rustc_index::newtype_index! { + struct EdgeIndex { .. } +} + +/// Data for nodes in the current graph, divided into different collections +/// based on their presence in the previous graph, and if present, their color. +/// We divide nodes this way because different types of nodes are able to share +/// more or less data with the previous graph. +/// +/// To enable more sharing, we distinguish between two kinds of green nodes. +/// Light green nodes are nodes in the previous graph that have been marked +/// green because we re-executed their queries and the results were the same as +/// in the previous session. Dark green nodes are nodes in the previous graph +/// that have been marked green because we were able to mark all of their +/// dependencies green. +/// +/// Both light and dark green nodes can share the dep node and fingerprint with +/// the previous graph, but for light green nodes, we can't be sure that the +/// edges may be shared without comparing them against the previous edges, so we +/// store them directly (an approach in which we compare edges with the previous +/// edges to see if they can be shared was evaluated, but was not found to be +/// very profitable). +/// +/// For dark green nodes, we can share everything with the previous graph, which +/// is why the `HybridIndex::DarkGreen` enum variant contains the index of the +/// node in the previous graph, and why we don't have a separate collection for +/// dark green node data--the collection is the `PreviousDepGraph` itself. +/// +/// (Note that for dark green nodes, the edges in the previous graph +/// (`SerializedDepNodeIndex`s) must be converted to edges in the current graph +/// (`DepNodeIndex`s). `CurrentDepGraph` contains `prev_index_to_index`, which +/// can perform this conversion. It should always be possible, as by definition, +/// a dark green node is one whose dependencies from the previous session have +/// all been marked green--which means `prev_index_to_index` contains them.) +/// +/// Node data is stored in parallel vectors to eliminate the padding between +/// elements that would be needed to satisfy alignment requirements of the +/// structure that would contain all of a node's data. We could group tightly +/// packing subsets of node data together and use fewer vectors, but for +/// consistency's sake, we use separate vectors for each piece of data. struct DepNodeData<K> { - node: DepNode<K>, - edges: EdgesVec, - fingerprint: Fingerprint, + /// Data for nodes not in previous graph. + new: NewDepNodeData<K>, + + /// Data for nodes in previous graph that have been marked red. + red: RedDepNodeData, + + /// Data for nodes in previous graph that have been marked light green. + light_green: LightGreenDepNodeData, + + // Edges for all nodes other than dark-green ones. Edges for each node + // occupy a contiguous region of this collection, which a node can reference + // using two indices. Storing edges this way rather than using an `EdgesVec` + // for each node reduces memory consumption by a not insignificant amount + // when compiling large crates. The downside is that we have to copy into + // this collection the edges from the `EdgesVec`s that are built up during + // query execution. But this is mostly balanced out by the more efficient + // implementation of `DepGraph::serialize` enabled by this representation. + unshared_edges: IndexVec<EdgeIndex, DepNodeIndex>, + + /// Mapping from `DepNodeIndex` to an index into a collection above. + /// Indicates which of the above collections contains a node's data. + /// + /// This collection is wasteful in time and space during incr-full builds, + /// because for those, all nodes are new. However, the waste is relatively + /// small, and the maintenance cost of avoiding using this for incr-full + /// builds is somewhat high and prone to bugginess. It does not seem worth + /// it at the time of this writing, but we may want to revisit the idea. + hybrid_indices: IndexVec<DepNodeIndex, CompressedHybridIndex>, +} + +/// Data for nodes not in previous graph. Since we cannot share any data with +/// the previous graph, so we must store all of such a node's data here. +struct NewDepNodeData<K> { + nodes: IndexVec<NewDepNodeIndex, DepNode<K>>, + edges: IndexVec<NewDepNodeIndex, Range<EdgeIndex>>, + fingerprints: IndexVec<NewDepNodeIndex, Fingerprint>, } -/// `CurrentDepGraph` stores the dependency graph for the current session. -/// It will be populated as we run queries or tasks. +/// Data for nodes in previous graph that have been marked red. We can share the +/// dep node with the previous graph, but the edges may be different, and the +/// fingerprint is known to be different, so we store the latter two directly. +struct RedDepNodeData { + node_indices: IndexVec<RedDepNodeIndex, SerializedDepNodeIndex>, + edges: IndexVec<RedDepNodeIndex, Range<EdgeIndex>>, + fingerprints: IndexVec<RedDepNodeIndex, Fingerprint>, +} + +/// Data for nodes in previous graph that have been marked green because we +/// re-executed their queries and the results were the same as in the previous +/// session. We can share the dep node and the fingerprint with the previous +/// graph, but the edges may be different, so we store them directly. +struct LightGreenDepNodeData { + node_indices: IndexVec<LightGreenDepNodeIndex, SerializedDepNodeIndex>, + edges: IndexVec<LightGreenDepNodeIndex, Range<EdgeIndex>>, +} + +/// `CurrentDepGraph` stores the dependency graph for the current session. It +/// will be populated as we run queries or tasks. We never remove nodes from the +/// graph: they are only added. /// -/// The nodes in it are identified by an index (`DepNodeIndex`). -/// The data for each node is stored in its `DepNodeData`, found in the `data` field. +/// The nodes in it are identified by a `DepNodeIndex`. Internally, this maps to +/// a `HybridIndex`, which identifies which collection in the `data` field +/// contains a node's data. Which collection is used for a node depends on +/// whether the node was present in the `PreviousDepGraph`, and if so, the color +/// of the node. Each type of node can share more or less data with the previous +/// graph. When possible, we can store just the index of the node in the +/// previous graph, rather than duplicating its data in our own collections. +/// This is important, because these graph structures are some of the largest in +/// the compiler. /// -/// We never remove nodes from the graph: they are only added. +/// For the same reason, we also avoid storing `DepNode`s more than once as map +/// keys. The `new_node_to_index` map only contains nodes not in the previous +/// graph, and we map nodes in the previous graph to indices via a two-step +/// mapping. `PreviousDepGraph` maps from `DepNode` to `SerializedDepNodeIndex`, +/// and the `prev_index_to_index` vector (which is more compact and faster than +/// using a map) maps from `SerializedDepNodeIndex` to `DepNodeIndex`. /// -/// This struct uses two locks internally. The `data` and `node_to_node_index` fields are -/// locked separately. Operations that take a `DepNodeIndex` typically just access -/// the data field. +/// This struct uses three locks internally. The `data`, `new_node_to_index`, +/// and `prev_index_to_index` fields are locked separately. Operations that take +/// a `DepNodeIndex` typically just access the `data` field. /// -/// The only operation that must manipulate both locks is adding new nodes, in which case -/// we first acquire the `node_to_node_index` lock and then, once a new node is to be inserted, -/// acquire the lock on `data.` +/// We only need to manipulate at most two locks simultaneously: +/// `new_node_to_index` and `data`, or `prev_index_to_index` and `data`. When +/// manipulating both, we acquire `new_node_to_index` or `prev_index_to_index` +/// first, and `data` second. pub(super) struct CurrentDepGraph<K> { - data: Lock<IndexVec<DepNodeIndex, DepNodeData<K>>>, - node_to_node_index: Sharded<FxHashMap<DepNode<K>, DepNodeIndex>>, + data: Lock<DepNodeData<K>>, + new_node_to_index: Sharded<FxHashMap<DepNode<K>, DepNodeIndex>>, + prev_index_to_index: Lock<IndexVec<SerializedDepNodeIndex, Option<DepNodeIndex>>>, /// Used to trap when a specific edge is added to the graph. /// This is used for debug purposes and is only active with `debug_assertions`. @@ -930,18 +1317,63 @@ impl<K: DepKind> CurrentDepGraph<K> { // Pre-allocate the dep node structures. We over-allocate a little so // that we hopefully don't have to re-allocate during this compilation - // session. The over-allocation is 2% plus a small constant to account - // for the fact that in very small crates 2% might not be enough. - let new_node_count_estimate = (prev_graph_node_count * 102) / 100 + 200; + // session. The over-allocation for new nodes is 2% plus a small + // constant to account for the fact that in very small crates 2% might + // not be enough. The allocation for red and green node data doesn't + // include a constant, as we don't want to allocate anything for these + // structures during full incremental builds, where they aren't used. + // + // These estimates are based on the distribution of node and edge counts + // seen in rustc-perf benchmarks, adjusted somewhat to account for the + // fact that these benchmarks aren't perfectly representative. + // + // FIXME Use a collection type that doesn't copy node and edge data and + // grow multiplicatively on reallocation. Without such a collection or + // solution having the same effect, there is a performance hazard here + // in both time and space, as growing these collections means copying a + // large amount of data and doubling already large buffer capacities. A + // solution for this will also mean that it's less important to get + // these estimates right. + let new_node_count_estimate = (prev_graph_node_count * 2) / 100 + 200; + let red_node_count_estimate = (prev_graph_node_count * 3) / 100; + let light_green_node_count_estimate = (prev_graph_node_count * 25) / 100; + let total_node_count_estimate = prev_graph_node_count + new_node_count_estimate; + + let average_edges_per_node_estimate = 6; + let unshared_edge_count_estimate = average_edges_per_node_estimate + * (new_node_count_estimate + red_node_count_estimate + light_green_node_count_estimate); + + // We store a large collection of these in `prev_index_to_index` during + // non-full incremental builds, and want to ensure that the element size + // doesn't inadvertently increase. + static_assert_size!(Option<DepNodeIndex>, 4); CurrentDepGraph { - data: Lock::new(IndexVec::with_capacity(new_node_count_estimate)), - node_to_node_index: Sharded::new(|| { + data: Lock::new(DepNodeData { + new: NewDepNodeData { + nodes: IndexVec::with_capacity(new_node_count_estimate), + edges: IndexVec::with_capacity(new_node_count_estimate), + fingerprints: IndexVec::with_capacity(new_node_count_estimate), + }, + red: RedDepNodeData { + node_indices: IndexVec::with_capacity(red_node_count_estimate), + edges: IndexVec::with_capacity(red_node_count_estimate), + fingerprints: IndexVec::with_capacity(red_node_count_estimate), + }, + light_green: LightGreenDepNodeData { + node_indices: IndexVec::with_capacity(light_green_node_count_estimate), + edges: IndexVec::with_capacity(light_green_node_count_estimate), + }, + unshared_edges: IndexVec::with_capacity(unshared_edge_count_estimate), + hybrid_indices: IndexVec::with_capacity(total_node_count_estimate), + }), + new_node_to_index: Sharded::new(|| { FxHashMap::with_capacity_and_hasher( new_node_count_estimate / sharded::SHARDS, Default::default(), ) }), + prev_index_to_index: Lock::new(IndexVec::from_elem_n(None, prev_graph_node_count)), anon_id_seed: stable_hasher.finish(), forbidden_edge, total_read_count: AtomicU64::new(0), @@ -949,116 +1381,127 @@ impl<K: DepKind> CurrentDepGraph<K> { } } - fn complete_task( + fn intern_new_node( &self, - node: DepNode<K>, - task_deps: TaskDeps<K>, + prev_graph: &PreviousDepGraph<K>, + dep_node: DepNode<K>, + edges: EdgesVec, fingerprint: Fingerprint, ) -> DepNodeIndex { - self.alloc_node(node, task_deps.reads, fingerprint) - } - - fn complete_anon_task(&self, kind: K, task_deps: TaskDeps<K>) -> DepNodeIndex { - debug_assert!(!kind.is_eval_always()); - - let mut hasher = StableHasher::new(); - - // The dep node indices are hashed here instead of hashing the dep nodes of the - // dependencies. These indices may refer to different nodes per session, but this isn't - // a problem here because we that ensure the final dep node hash is per session only by - // combining it with the per session random number `anon_id_seed`. This hash only need - // to map the dependencies to a single value on a per session basis. - task_deps.reads.hash(&mut hasher); - - let target_dep_node = DepNode { - kind, - - // Fingerprint::combine() is faster than sending Fingerprint - // through the StableHasher (at least as long as StableHasher - // is so slow). - hash: self.anon_id_seed.combine(hasher.finish()).into(), - }; + debug_assert!( + prev_graph.node_to_index_opt(&dep_node).is_none(), + "node in previous graph should be interned using one \ + of `intern_red_node`, `intern_light_green_node`, etc." + ); - self.intern_node(target_dep_node, task_deps.reads, Fingerprint::ZERO) + match self.new_node_to_index.get_shard_by_value(&dep_node).lock().entry(dep_node) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let data = &mut *self.data.lock(); + let new_index = data.new.nodes.push(dep_node); + add_edges(&mut data.unshared_edges, &mut data.new.edges, edges); + data.new.fingerprints.push(fingerprint); + let dep_node_index = data.hybrid_indices.push(new_index.into()); + entry.insert(dep_node_index); + dep_node_index + } + } } - fn alloc_node( + fn intern_red_node( &self, - dep_node: DepNode<K>, + prev_graph: &PreviousDepGraph<K>, + prev_index: SerializedDepNodeIndex, edges: EdgesVec, fingerprint: Fingerprint, ) -> DepNodeIndex { - debug_assert!( - !self.node_to_node_index.get_shard_by_value(&dep_node).lock().contains_key(&dep_node) - ); - self.intern_node(dep_node, edges, fingerprint) + self.debug_assert_not_in_new_nodes(prev_graph, prev_index); + + let mut prev_index_to_index = self.prev_index_to_index.lock(); + + match prev_index_to_index[prev_index] { + Some(dep_node_index) => dep_node_index, + None => { + let data = &mut *self.data.lock(); + let red_index = data.red.node_indices.push(prev_index); + add_edges(&mut data.unshared_edges, &mut data.red.edges, edges); + data.red.fingerprints.push(fingerprint); + let dep_node_index = data.hybrid_indices.push(red_index.into()); + prev_index_to_index[prev_index] = Some(dep_node_index); + dep_node_index + } + } } - fn intern_node( + fn intern_light_green_node( &self, - dep_node: DepNode<K>, + prev_graph: &PreviousDepGraph<K>, + prev_index: SerializedDepNodeIndex, edges: EdgesVec, - fingerprint: Fingerprint, ) -> DepNodeIndex { - match self.node_to_node_index.get_shard_by_value(&dep_node).lock().entry(dep_node) { - Entry::Occupied(entry) => *entry.get(), - Entry::Vacant(entry) => { - let mut data = self.data.lock(); - let dep_node_index = DepNodeIndex::new(data.len()); - data.push(DepNodeData { node: dep_node, edges, fingerprint }); - entry.insert(dep_node_index); + self.debug_assert_not_in_new_nodes(prev_graph, prev_index); + + let mut prev_index_to_index = self.prev_index_to_index.lock(); + + match prev_index_to_index[prev_index] { + Some(dep_node_index) => dep_node_index, + None => { + let data = &mut *self.data.lock(); + let light_green_index = data.light_green.node_indices.push(prev_index); + add_edges(&mut data.unshared_edges, &mut data.light_green.edges, edges); + let dep_node_index = data.hybrid_indices.push(light_green_index.into()); + prev_index_to_index[prev_index] = Some(dep_node_index); dep_node_index } } } -} -impl<K: DepKind> DepGraphData<K> { - #[inline(never)] - fn read_index(&self, source: DepNodeIndex) { - K::read_deps(|task_deps| { - if let Some(task_deps) = task_deps { - let mut task_deps = task_deps.lock(); - let task_deps = &mut *task_deps; - if cfg!(debug_assertions) { - self.current.total_read_count.fetch_add(1, Relaxed); - } + fn intern_dark_green_node( + &self, + prev_graph: &PreviousDepGraph<K>, + prev_index: SerializedDepNodeIndex, + ) -> DepNodeIndex { + self.debug_assert_not_in_new_nodes(prev_graph, prev_index); - // As long as we only have a low number of reads we can avoid doing a hash - // insert and potentially allocating/reallocating the hashmap - let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP { - task_deps.reads.iter().all(|other| *other != source) - } else { - task_deps.read_set.insert(source) - }; - if new_read { - task_deps.reads.push(source); - if task_deps.reads.len() == TASK_DEPS_READS_CAP { - // Fill `read_set` with what we have so far so we can use the hashset next - // time - task_deps.read_set.extend(task_deps.reads.iter().copied()); - } + let mut prev_index_to_index = self.prev_index_to_index.lock(); - #[cfg(debug_assertions)] - { - if let Some(target) = task_deps.node { - let data = self.current.data.lock(); - if let Some(ref forbidden_edge) = self.current.forbidden_edge { - let source = data[source].node; - if forbidden_edge.test(&source, &target) { - panic!("forbidden edge {:?} -> {:?} created", source, target) - } - } - } - } - } else if cfg!(debug_assertions) { - self.current.total_duplicate_read_count.fetch_add(1, Relaxed); - } + match prev_index_to_index[prev_index] { + Some(dep_node_index) => dep_node_index, + None => { + let mut data = self.data.lock(); + let dep_node_index = data.hybrid_indices.push(prev_index.into()); + prev_index_to_index[prev_index] = Some(dep_node_index); + dep_node_index } - }) + } + } + + #[inline] + fn debug_assert_not_in_new_nodes( + &self, + prev_graph: &PreviousDepGraph<K>, + prev_index: SerializedDepNodeIndex, + ) { + let node = &prev_graph.index_to_node(prev_index); + debug_assert!( + !self.new_node_to_index.get_shard_by_value(node).lock().contains_key(node), + "node from previous graph present in new node collection" + ); } } +#[inline] +fn add_edges<I: Idx>( + edges: &mut IndexVec<EdgeIndex, DepNodeIndex>, + edge_indices: &mut IndexVec<I, Range<EdgeIndex>>, + new_edges: EdgesVec, +) { + let start = edges.next_index(); + edges.extend(new_edges); + let end = edges.next_index(); + edge_indices.push(start..end); +} + /// The capacity of the `reads` field `SmallVec` const TASK_DEPS_READS_CAP: usize = 8; type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>; diff --git a/compiler/rustc_query_system/src/dep_graph/mod.rs b/compiler/rustc_query_system/src/dep_graph/mod.rs index 3b4b62ad6be..da0b5aad6c8 100644 --- a/compiler/rustc_query_system/src/dep_graph/mod.rs +++ b/compiler/rustc_query_system/src/dep_graph/mod.rs @@ -15,7 +15,6 @@ use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::sync::Lock; use rustc_data_structures::thin_vec::ThinVec; use rustc_errors::Diagnostic; -use rustc_span::def_id::DefPathHash; use std::fmt; use std::hash::Hash; @@ -33,7 +32,7 @@ pub trait DepContext: Copy { /// Try to force a dep node to execute and see if it's green. fn try_force_from_dep_node(&self, dep_node: &DepNode<Self::DepKind>) -> bool; - fn register_reused_dep_path_hash(&self, hash: DefPathHash); + fn register_reused_dep_node(&self, dep_node: &DepNode<Self::DepKind>); /// Return whether the current session is tainted by errors. fn has_errors_or_delayed_span_bugs(&self) -> bool; diff --git a/compiler/rustc_query_system/src/dep_graph/prev.rs b/compiler/rustc_query_system/src/dep_graph/prev.rs index 9298b652da2..29357ce9449 100644 --- a/compiler/rustc_query_system/src/dep_graph/prev.rs +++ b/compiler/rustc_query_system/src/dep_graph/prev.rs @@ -1,9 +1,7 @@ use super::serialized::{SerializedDepGraph, SerializedDepNodeIndex}; use super::{DepKind, DepNode}; -use crate::dep_graph::DepContext; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxHashMap; -use rustc_span::def_id::DefPathHash; #[derive(Debug, Encodable, Decodable)] pub struct PreviousDepGraph<K: DepKind> { @@ -33,44 +31,7 @@ impl<K: DepKind> PreviousDepGraph<K> { } #[inline] - pub fn index_to_node<CTX: DepContext<DepKind = K>>( - &self, - dep_node_index: SerializedDepNodeIndex, - tcx: CTX, - ) -> DepNode<K> { - let dep_node = self.data.nodes[dep_node_index]; - // We have just loaded a deserialized `DepNode` from the previous - // compilation session into the current one. If this was a foreign `DefId`, - // then we stored additional information in the incr comp cache when we - // initially created its fingerprint (see `DepNodeParams::to_fingerprint`) - // We won't be calling `to_fingerprint` again for this `DepNode` (we no longer - // have the original value), so we need to copy over this additional information - // from the old incremental cache into the new cache that we serialize - // and the end of this compilation session. - if dep_node.kind.can_reconstruct_query_key() { - tcx.register_reused_dep_path_hash(DefPathHash(dep_node.hash.into())); - } - dep_node - } - - /// When debug assertions are enabled, asserts that the dep node at `dep_node_index` is equal to `dep_node`. - /// This method should be preferred over manually calling `index_to_node`. - /// Calls to `index_to_node` may affect global state, so gating a call - /// to `index_to_node` on debug assertions could cause behavior changes when debug assertions - /// are enabled. - #[inline] - pub fn debug_assert_eq(&self, dep_node_index: SerializedDepNodeIndex, dep_node: DepNode<K>) { - debug_assert_eq!(self.data.nodes[dep_node_index], dep_node); - } - - /// Obtains a debug-printable version of the `DepNode`. - /// See `debug_assert_eq` for why this should be preferred over manually - /// calling `dep_node_index` - pub fn debug_dep_node(&self, dep_node_index: SerializedDepNodeIndex) -> impl std::fmt::Debug { - // We're returning the `DepNode` without calling `register_reused_dep_path_hash`, - // but `impl Debug` return type means that it can only be used for debug printing. - // So, there's no risk of calls trying to create new dep nodes that have this - // node as a dependency + pub fn index_to_node(&self, dep_node_index: SerializedDepNodeIndex) -> DepNode<K> { self.data.nodes[dep_node_index] } diff --git a/compiler/rustc_query_system/src/dep_graph/query.rs b/compiler/rustc_query_system/src/dep_graph/query.rs index a27b716b95a..cc25d08cb54 100644 --- a/compiler/rustc_query_system/src/dep_graph/query.rs +++ b/compiler/rustc_query_system/src/dep_graph/query.rs @@ -9,17 +9,23 @@ pub struct DepGraphQuery<K> { } impl<K: DepKind> DepGraphQuery<K> { - pub fn new(nodes: &[DepNode<K>], edges: &[(DepNode<K>, DepNode<K>)]) -> DepGraphQuery<K> { - let mut graph = Graph::with_capacity(nodes.len(), edges.len()); + pub fn new( + nodes: &[DepNode<K>], + edge_list_indices: &[(usize, usize)], + edge_list_data: &[usize], + ) -> DepGraphQuery<K> { + let mut graph = Graph::with_capacity(nodes.len(), edge_list_data.len()); let mut indices = FxHashMap::default(); for node in nodes { indices.insert(*node, graph.add_node(*node)); } - for &(ref source, ref target) in edges { - let source = indices[source]; - let target = indices[target]; - graph.add_edge(source, target, ()); + for (source, &(start, end)) in edge_list_indices.iter().enumerate() { + for &target in &edge_list_data[start..end] { + let source = indices[&nodes[source]]; + let target = indices[&nodes[target]]; + graph.add_edge(source, target, ()); + } } DepGraphQuery { graph, indices } diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs index 932c6d2a2f1..28e07406918 100644 --- a/compiler/rustc_query_system/src/dep_graph/serialized.rs +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -4,8 +4,13 @@ use super::{DepKind, DepNode}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_index::vec::IndexVec; +// The maximum value of `SerializedDepNodeIndex` leaves the upper two bits +// unused so that we can store multiple index types in `CompressedHybridIndex`, +// and use those bits to encode which index type it contains. rustc_index::newtype_index! { - pub struct SerializedDepNodeIndex { .. } + pub struct SerializedDepNodeIndex { + MAX = 0x7FFF_FFFF + } } /// Data for use when recompiling the **current crate**. diff --git a/compiler/rustc_save_analysis/src/lib.rs b/compiler/rustc_save_analysis/src/lib.rs index eed9f2eb74d..056c0b3d9d5 100644 --- a/compiler/rustc_save_analysis/src/lib.rs +++ b/compiler/rustc_save_analysis/src/lib.rs @@ -825,7 +825,7 @@ impl<'tcx> SaveContext<'tcx> { for attr in attrs { if let Some(val) = attr.doc_str() { // FIXME: Should save-analysis beautify doc strings itself or leave it to users? - result.push_str(&beautify_doc_string(val)); + result.push_str(&beautify_doc_string(val).as_str()); result.push('\n'); } else if self.tcx.sess.check_name(attr, sym::doc) { if let Some(meta_list) = attr.meta_item_list() { diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index f63a73acbf4..fbef4d06709 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -182,7 +182,7 @@ impl std::fmt::Display for FileName { use FileName::*; match *self { Real(RealFileName::Named(ref path)) => write!(fmt, "{}", path.display()), - // FIXME: might be nice to display both compoments of Devirtualized. + // FIXME: might be nice to display both components of Devirtualized. // But for now (to backport fix for issue #70924), best to not // perturb diagnostics so its obvious test suite still works. Real(RealFileName::Devirtualized { ref local_path, virtual_name: _ }) => { diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 4d14763825c..99a523c3f3b 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -13,7 +13,7 @@ use std::fmt; use std::hash::{Hash, Hasher}; use std::str; -use crate::{Span, DUMMY_SP, SESSION_GLOBALS}; +use crate::{Edition, Span, DUMMY_SP, SESSION_GLOBALS}; #[cfg(test)] mod tests; @@ -1609,12 +1609,32 @@ pub mod sym { } impl Symbol { - fn is_used_keyword_2018(self) -> bool { - self >= kw::Async && self <= kw::Dyn + fn is_special(self) -> bool { + self <= kw::Underscore } - fn is_unused_keyword_2018(self) -> bool { - self == kw::Try + fn is_used_keyword_always(self) -> bool { + self >= kw::As && self <= kw::While + } + + fn is_used_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool { + (self >= kw::Async && self <= kw::Dyn) && edition() >= Edition::Edition2018 + } + + fn is_unused_keyword_always(self) -> bool { + self >= kw::Abstract && self <= kw::Yield + } + + fn is_unused_keyword_conditional(self, edition: impl FnOnce() -> Edition) -> bool { + self == kw::Try && edition() >= Edition::Edition2018 + } + + pub fn is_reserved(self, edition: impl Copy + FnOnce() -> Edition) -> bool { + self.is_special() + || self.is_used_keyword_always() + || self.is_unused_keyword_always() + || self.is_used_keyword_conditional(edition) + || self.is_unused_keyword_conditional(edition) } /// A keyword or reserved identifier that can be used as a path segment. @@ -1642,26 +1662,27 @@ impl Ident { // Returns `true` for reserved identifiers used internally for elided lifetimes, // unnamed method parameters, crate root module, error recovery etc. pub fn is_special(self) -> bool { - self.name <= kw::Underscore + self.name.is_special() } /// Returns `true` if the token is a keyword used in the language. pub fn is_used_keyword(self) -> bool { // Note: `span.edition()` is relatively expensive, don't call it unless necessary. - self.name >= kw::As && self.name <= kw::While - || self.name.is_used_keyword_2018() && self.span.rust_2018() + self.name.is_used_keyword_always() + || self.name.is_used_keyword_conditional(|| self.span.edition()) } /// Returns `true` if the token is a keyword reserved for possible future use. pub fn is_unused_keyword(self) -> bool { // Note: `span.edition()` is relatively expensive, don't call it unless necessary. - self.name >= kw::Abstract && self.name <= kw::Yield - || self.name.is_unused_keyword_2018() && self.span.rust_2018() + self.name.is_unused_keyword_always() + || self.name.is_unused_keyword_conditional(|| self.span.edition()) } /// Returns `true` if the token is either a special identifier or a keyword. pub fn is_reserved(self) -> bool { - self.is_special() || self.is_used_keyword() || self.is_unused_keyword() + // Note: `span.edition()` is relatively expensive, don't call it unless necessary. + self.name.is_reserved(|| self.span.edition()) } /// A keyword or reserved identifier that can be used as a path segment. @@ -1681,7 +1702,7 @@ fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T { SESSION_GLOBALS.with(|session_globals| f(&mut *session_globals.symbol_interner.lock())) } -/// An alternative to `Symbol`, useful when the chars within the symbol need to +/// An alternative to [`Symbol`], useful when the chars within the symbol need to /// be accessed. It deliberately has limited functionality and should only be /// used for temporary values. /// diff --git a/compiler/rustc_typeck/src/check/demand.rs b/compiler/rustc_typeck/src/check/demand.rs index aa4d57f7e1d..2728e03171a 100644 --- a/compiler/rustc_typeck/src/check/demand.rs +++ b/compiler/rustc_typeck/src/check/demand.rs @@ -360,10 +360,6 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { false } - fn replace_prefix(&self, s: &str, old: &str, new: &str) -> Option<String> { - s.strip_prefix(old).map(|stripped| new.to_string() + stripped) - } - /// This function is used to determine potential "simple" improvements or users' errors and /// provide them useful help. For example: /// @@ -394,6 +390,10 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { return None; } + let replace_prefix = |s: &str, old: &str, new: &str| { + s.strip_prefix(old).map(|stripped| new.to_string() + stripped) + }; + let is_struct_pat_shorthand_field = self.is_hir_id_from_struct_pattern_shorthand_field(expr.hir_id, sp); @@ -409,7 +409,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { (&ty::Str, &ty::Array(arr, _) | &ty::Slice(arr)) if arr == self.tcx.types.u8 => { if let hir::ExprKind::Lit(_) = expr.kind { if let Ok(src) = sm.span_to_snippet(sp) { - if let Some(src) = self.replace_prefix(&src, "b\"", "\"") { + if let Some(src) = replace_prefix(&src, "b\"", "\"") { return Some(( sp, "consider removing the leading `b`", @@ -423,7 +423,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { (&ty::Array(arr, _) | &ty::Slice(arr), &ty::Str) if arr == self.tcx.types.u8 => { if let hir::ExprKind::Lit(_) = expr.kind { if let Ok(src) = sm.span_to_snippet(sp) { - if let Some(src) = self.replace_prefix(&src, "\"", "b\"") { + if let Some(src) = replace_prefix(&src, "\"", "b\"") { return Some(( sp, "consider adding a leading `b`", @@ -583,23 +583,27 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { hir::Mutability::Mut => { let new_prefix = "&mut ".to_owned() + derefs; match mutbl_a { - hir::Mutability::Mut => self - .replace_prefix(&src, "&mut ", &new_prefix) - .map(|s| (s, Applicability::MachineApplicable)), - hir::Mutability::Not => self - .replace_prefix(&src, "&", &new_prefix) - .map(|s| (s, Applicability::Unspecified)), + hir::Mutability::Mut => { + replace_prefix(&src, "&mut ", &new_prefix) + .map(|s| (s, Applicability::MachineApplicable)) + } + hir::Mutability::Not => { + replace_prefix(&src, "&", &new_prefix) + .map(|s| (s, Applicability::Unspecified)) + } } } hir::Mutability::Not => { let new_prefix = "&".to_owned() + derefs; match mutbl_a { - hir::Mutability::Mut => self - .replace_prefix(&src, "&mut ", &new_prefix) - .map(|s| (s, Applicability::MachineApplicable)), - hir::Mutability::Not => self - .replace_prefix(&src, "&", &new_prefix) - .map(|s| (s, Applicability::MachineApplicable)), + hir::Mutability::Mut => { + replace_prefix(&src, "&mut ", &new_prefix) + .map(|s| (s, Applicability::MachineApplicable)) + } + hir::Mutability::Not => { + replace_prefix(&src, "&", &new_prefix) + .map(|s| (s, Applicability::MachineApplicable)) + } } } } { |
