diff options
77 files changed, 1304 insertions, 564 deletions
diff --git a/compiler/rustc_ast/src/expand/autodiff_attrs.rs b/compiler/rustc_ast/src/expand/autodiff_attrs.rs index ecc522ec39d..70222f4acab 100644 --- a/compiler/rustc_ast/src/expand/autodiff_attrs.rs +++ b/compiler/rustc_ast/src/expand/autodiff_attrs.rs @@ -30,14 +30,6 @@ pub enum DiffMode { Forward, /// The target function, to be created using reverse mode AD. Reverse, - /// The target function, to be created using forward mode AD. - /// This target function will also be used as a source for higher order derivatives, - /// so compute it before all Forward/Reverse targets and optimize it through llvm. - ForwardFirst, - /// The target function, to be created using reverse mode AD. - /// This target function will also be used as a source for higher order derivatives, - /// so compute it before all Forward/Reverse targets and optimize it through llvm. - ReverseFirst, } /// Dual and Duplicated (and their Only variants) are getting lowered to the same Enzyme Activity. @@ -92,10 +84,10 @@ pub struct AutoDiffAttrs { impl DiffMode { pub fn is_rev(&self) -> bool { - matches!(self, DiffMode::Reverse | DiffMode::ReverseFirst) + matches!(self, DiffMode::Reverse) } pub fn is_fwd(&self) -> bool { - matches!(self, DiffMode::Forward | DiffMode::ForwardFirst) + matches!(self, DiffMode::Forward) } } @@ -106,8 +98,6 @@ impl Display for DiffMode { DiffMode::Source => write!(f, "Source"), DiffMode::Forward => write!(f, "Forward"), DiffMode::Reverse => write!(f, "Reverse"), - DiffMode::ForwardFirst => write!(f, "ForwardFirst"), - DiffMode::ReverseFirst => write!(f, "ReverseFirst"), } } } @@ -125,12 +115,12 @@ pub fn valid_ret_activity(mode: DiffMode, activity: DiffActivity) -> bool { match mode { DiffMode::Error => false, DiffMode::Source => false, - DiffMode::Forward | DiffMode::ForwardFirst => { + DiffMode::Forward => { activity == DiffActivity::Dual || activity == DiffActivity::DualOnly || activity == DiffActivity::Const } - DiffMode::Reverse | DiffMode::ReverseFirst => { + DiffMode::Reverse => { activity == DiffActivity::Const || activity == DiffActivity::Active || activity == DiffActivity::ActiveOnly @@ -166,10 +156,10 @@ pub fn valid_input_activity(mode: DiffMode, activity: DiffActivity) -> bool { return match mode { DiffMode::Error => false, DiffMode::Source => false, - DiffMode::Forward | DiffMode::ForwardFirst => { + DiffMode::Forward => { matches!(activity, Dual | DualOnly | Const) } - DiffMode::Reverse | DiffMode::ReverseFirst => { + DiffMode::Reverse => { matches!(activity, Active | ActiveOnly | Duplicated | DuplicatedOnly | Const) } }; @@ -200,8 +190,6 @@ impl FromStr for DiffMode { "Source" => Ok(DiffMode::Source), "Forward" => Ok(DiffMode::Forward), "Reverse" => Ok(DiffMode::Reverse), - "ForwardFirst" => Ok(DiffMode::ForwardFirst), - "ReverseFirst" => Ok(DiffMode::ReverseFirst), _ => Err(()), } } diff --git a/compiler/rustc_codegen_cranelift/patches/0029-stdlib-Disable-f16-and-f128-in-compiler-builtins.patch b/compiler/rustc_codegen_cranelift/patches/0029-stdlib-Disable-f16-and-f128-in-compiler-builtins.patch index bac411d1eb0..e3a9512dda9 100644 --- a/compiler/rustc_codegen_cranelift/patches/0029-stdlib-Disable-f16-and-f128-in-compiler-builtins.patch +++ b/compiler/rustc_codegen_cranelift/patches/0029-stdlib-Disable-f16-and-f128-in-compiler-builtins.patch @@ -16,8 +16,8 @@ index 7165c3e48af..968552ad435 100644 [dependencies] core = { path = "../core" } --compiler_builtins = { version = "=0.1.145", features = ['rustc-dep-of-std'] } -+compiler_builtins = { version = "=0.1.145", features = ['rustc-dep-of-std', 'no-f16-f128'] } +-compiler_builtins = { version = "=0.1.146", features = ['rustc-dep-of-std'] } ++compiler_builtins = { version = "=0.1.146", features = ['rustc-dep-of-std', 'no-f16-f128'] } [dev-dependencies] rand = { version = "0.8.5", default-features = false, features = ["alloc"] } diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 78c759bbe8c..8bad437eeb7 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -606,10 +606,31 @@ pub(crate) fn run_pass_manager( // If this rustc version was build with enzyme/autodiff enabled, and if users applied the // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time. - let first_run = true; debug!("running llvm pm opt pipeline"); unsafe { - write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, first_run)?; + write::llvm_optimize( + cgcx, + dcx, + module, + config, + opt_level, + opt_stage, + write::AutodiffStage::DuringAD, + )?; + } + // FIXME(ZuseZ4): Make this more granular + if cfg!(llvm_enzyme) && !thin { + unsafe { + write::llvm_optimize( + cgcx, + dcx, + module, + config, + opt_level, + llvm::OptStage::FatLTO, + write::AutodiffStage::PostAD, + )?; + } } debug!("lto done"); Ok(()) diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 4706744f353..ae4c4d5876e 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -530,6 +530,16 @@ fn get_instr_profile_output_path(config: &ModuleConfig) -> Option<CString> { config.instrument_coverage.then(|| c"default_%m_%p.profraw".to_owned()) } +// PreAD will run llvm opts but disable size increasing opts (vectorization, loop unrolling) +// DuringAD is the same as above, but also runs the enzyme opt and autodiff passes. +// PostAD will run all opts, including size increasing opts. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum AutodiffStage { + PreAD, + DuringAD, + PostAD, +} + pub(crate) unsafe fn llvm_optimize( cgcx: &CodegenContext<LlvmCodegenBackend>, dcx: DiagCtxtHandle<'_>, @@ -537,7 +547,7 @@ pub(crate) unsafe fn llvm_optimize( config: &ModuleConfig, opt_level: config::OptLevel, opt_stage: llvm::OptStage, - skip_size_increasing_opts: bool, + autodiff_stage: AutodiffStage, ) -> Result<(), FatalError> { // Enzyme: // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized @@ -550,12 +560,16 @@ pub(crate) unsafe fn llvm_optimize( let unroll_loops; let vectorize_slp; let vectorize_loop; + let run_enzyme = cfg!(llvm_enzyme) && autodiff_stage == AutodiffStage::DuringAD; // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing - // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly, + // optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt). + // We therefore have two calls to llvm_optimize, if autodiff is used. + // + // FIXME(ZuseZ4): Before shipping on nightly, // we should make this more granular, or at least check that the user has at least one autodiff // call in their code, to justify altering the compilation pipeline. - if skip_size_increasing_opts && cfg!(llvm_enzyme) { + if cfg!(llvm_enzyme) && autodiff_stage != AutodiffStage::PostAD { unroll_loops = false; vectorize_slp = false; vectorize_loop = false; @@ -565,7 +579,7 @@ pub(crate) unsafe fn llvm_optimize( vectorize_slp = config.vectorize_slp; vectorize_loop = config.vectorize_loop; } - trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop); + trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme); let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed(); let pgo_gen_path = get_pgo_gen_path(config); let pgo_use_path = get_pgo_use_path(config); @@ -633,6 +647,7 @@ pub(crate) unsafe fn llvm_optimize( vectorize_loop, config.no_builtins, config.emit_lifetime_markers, + run_enzyme, sanitizer_options.as_ref(), pgo_gen_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()), pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()), @@ -684,18 +699,14 @@ pub(crate) unsafe fn optimize( _ => llvm::OptStage::PreLinkNoLTO, }; - // If we know that we will later run AD, then we disable vectorization and loop unrolling - let skip_size_increasing_opts = cfg!(llvm_enzyme); + // If we know that we will later run AD, then we disable vectorization and loop unrolling. + // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD). + // FIXME(ZuseZ4): Make this more granular, only set PreAD if we actually have autodiff + // usages, not just if we build rustc with autodiff support. + let autodiff_stage = + if cfg!(llvm_enzyme) { AutodiffStage::PreAD } else { AutodiffStage::PostAD }; return unsafe { - llvm_optimize( - cgcx, - dcx, - module, - config, - opt_level, - opt_stage, - skip_size_increasing_opts, - ) + llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage) }; } Ok(()) diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs index dd5e726160d..b2c1088e3fc 100644 --- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs +++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs @@ -4,10 +4,9 @@ use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivit use rustc_codegen_ssa::ModuleCodegen; use rustc_codegen_ssa::back::write::ModuleConfig; use rustc_errors::FatalError; -use rustc_session::config::Lto; use tracing::{debug, trace}; -use crate::back::write::{llvm_err, llvm_optimize}; +use crate::back::write::llvm_err; use crate::builder::SBuilder; use crate::context::SimpleCx; use crate::declare::declare_simple_fn; @@ -53,8 +52,6 @@ fn generate_enzyme_call<'ll>( let mut ad_name: String = match attrs.mode { DiffMode::Forward => "__enzyme_fwddiff", DiffMode::Reverse => "__enzyme_autodiff", - DiffMode::ForwardFirst => "__enzyme_fwddiff", - DiffMode::ReverseFirst => "__enzyme_autodiff", _ => panic!("logic bug in autodiff, unrecognized mode"), } .to_string(); @@ -153,7 +150,7 @@ fn generate_enzyme_call<'ll>( _ => {} } - trace!("matching autodiff arguments"); + debug!("matching autodiff arguments"); // We now handle the issue that Rust level arguments not always match the llvm-ir level // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on // llvm-ir level. The number of activities matches the number of Rust level arguments, so we @@ -164,10 +161,10 @@ fn generate_enzyme_call<'ll>( let mut activity_pos = 0; let outer_args: Vec<&llvm::Value> = get_params(outer_fn); while activity_pos < inputs.len() { - let activity = inputs[activity_pos as usize]; + let diff_activity = inputs[activity_pos as usize]; // Duplicated arguments received a shadow argument, into which enzyme will write the // gradient. - let (activity, duplicated): (&Metadata, bool) = match activity { + let (activity, duplicated): (&Metadata, bool) = match diff_activity { DiffActivity::None => panic!("not a valid input activity"), DiffActivity::Const => (enzyme_const, false), DiffActivity::Active => (enzyme_out, false), @@ -222,7 +219,15 @@ fn generate_enzyme_call<'ll>( // A duplicated pointer will have the following two outer_fn arguments: // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call: // (..., metadata! enzyme_dup, ptr, ptr, ...). - assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer); + if matches!( + diff_activity, + DiffActivity::Duplicated | DiffActivity::DuplicatedOnly + ) { + assert!( + llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer + ); + } + // In the case of Dual we don't have assumptions, e.g. f32 would be valid. args.push(next_outer_arg); outer_pos += 2; activity_pos += 1; @@ -277,7 +282,7 @@ pub(crate) fn differentiate<'ll>( module: &'ll ModuleCodegen<ModuleLlvm>, cgcx: &CodegenContext<LlvmCodegenBackend>, diff_items: Vec<AutoDiffItem>, - config: &ModuleConfig, + _config: &ModuleConfig, ) -> Result<(), FatalError> { for item in &diff_items { trace!("{}", item); @@ -318,29 +323,6 @@ pub(crate) fn differentiate<'ll>( // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts - if let Some(opt_level) = config.opt_level { - let opt_stage = match cgcx.lto { - Lto::Fat => llvm::OptStage::PreLinkFatLTO, - Lto::Thin | Lto::ThinLocal => llvm::OptStage::PreLinkThinLTO, - _ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO, - _ => llvm::OptStage::PreLinkNoLTO, - }; - // This is our second opt call, so now we run all opts, - // to make sure we get the best performance. - let skip_size_increasing_opts = false; - trace!("running Module Optimization after differentiation"); - unsafe { - llvm_optimize( - cgcx, - diag_handler.handle(), - module, - config, - opt_level, - opt_stage, - skip_size_increasing_opts, - )? - }; - } trace!("done with differentiate()"); Ok(()) diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 50a40c9c309..4d6a76b23ea 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2382,6 +2382,7 @@ unsafe extern "C" { LoopVectorize: bool, DisableSimplifyLibCalls: bool, EmitLifetimeMarkers: bool, + RunEnzyme: bool, SanitizerOptions: Option<&SanitizerOptions>, PGOGenPath: *const c_char, PGOUsePath: *const c_char, diff --git a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs index 7acdbd19993..2c38fb5658f 100644 --- a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs +++ b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs @@ -914,8 +914,6 @@ fn autodiff_attrs(tcx: TyCtxt<'_>, id: DefId) -> Option<AutoDiffAttrs> { let mode = match mode.as_str() { "Forward" => DiffMode::Forward, "Reverse" => DiffMode::Reverse, - "ForwardFirst" => DiffMode::ForwardFirst, - "ReverseFirst" => DiffMode::ReverseFirst, _ => { span_bug!(mode.span, "rustc_autodiff attribute contains invalid mode"); } diff --git a/compiler/rustc_llvm/build.rs b/compiler/rustc_llvm/build.rs index d9d28299413..48806888b43 100644 --- a/compiler/rustc_llvm/build.rs +++ b/compiler/rustc_llvm/build.rs @@ -193,6 +193,10 @@ fn main() { cfg.define(&flag, None); } + if tracked_env_var_os("LLVM_ENZYME").is_some() { + cfg.define("ENZYME", None); + } + if tracked_env_var_os("LLVM_RUSTLLVM").is_some() { cfg.define("LLVM_RUSTLLVM", None); } diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index 6447a9362b3..a6b2384f2d7 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -688,14 +688,20 @@ struct LLVMRustSanitizerOptions { bool SanitizeKernelAddressRecover; }; +// This symbol won't be available or used when Enzyme is not enabled +#ifdef ENZYME +extern "C" void registerEnzyme(llvm::PassBuilder &PB); +#endif + extern "C" LLVMRustResult LLVMRustOptimize( LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef, LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage, bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR, bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops, bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls, - bool EmitLifetimeMarkers, LLVMRustSanitizerOptions *SanitizerOptions, - const char *PGOGenPath, const char *PGOUsePath, bool InstrumentCoverage, + bool EmitLifetimeMarkers, bool RunEnzyme, + LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath, + const char *PGOUsePath, bool InstrumentCoverage, const char *InstrProfileOutput, const char *PGOSampleUsePath, bool DebugInfoForProfiling, void *LlvmSelfProfiler, LLVMRustSelfProfileBeforePassCallback BeforePassCallback, @@ -1010,6 +1016,18 @@ extern "C" LLVMRustResult LLVMRustOptimize( MPM.addPass(NameAnonGlobalPass()); } + // now load "-enzyme" pass: +#ifdef ENZYME + if (RunEnzyme) { + registerEnzyme(PB); + if (auto Err = PB.parsePassPipeline(MPM, "enzyme")) { + std::string ErrMsg = toString(std::move(Err)); + LLVMRustSetLastError(ErrMsg.c_str()); + return LLVMRustResult::Failure; + } + } +#endif + // Upgrade all calls to old intrinsics first. for (Module::iterator I = TheModule->begin(), E = TheModule->end(); I != E;) UpgradeCallsToIntrinsic(&*I++); // must be post-increment, as we remove diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 2566081cf5d..a9b4593c13d 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -1301,9 +1301,6 @@ pub struct TyCtxt<'tcx> { gcx: &'tcx GlobalCtxt<'tcx>, } -// Explicitly implement `DynSync` and `DynSend` for `TyCtxt` to short circuit trait resolution. -unsafe impl DynSend for TyCtxt<'_> {} -unsafe impl DynSync for TyCtxt<'_> {} fn _assert_tcx_fields() { sync::assert_dyn_sync::<&'_ GlobalCtxt<'_>>(); sync::assert_dyn_send::<&'_ GlobalCtxt<'_>>(); diff --git a/compiler/rustc_target/src/spec/targets/i686_unknown_uefi.rs b/compiler/rustc_target/src/spec/targets/i686_unknown_uefi.rs index c1ed565f0fe..736b9131022 100644 --- a/compiler/rustc_target/src/spec/targets/i686_unknown_uefi.rs +++ b/compiler/rustc_target/src/spec/targets/i686_unknown_uefi.rs @@ -5,7 +5,7 @@ // The cdecl ABI is used. It differs from the stdcall or fastcall ABI. // "i686-unknown-windows" is used to get the minimal subset of windows-specific features. -use crate::spec::{RustcAbi, Target, base}; +use crate::spec::{LinkerFlavor, Lld, RustcAbi, Target, add_link_args, base}; pub(crate) fn target() -> Target { let mut base = base::uefi_msvc::opts(); @@ -24,6 +24,13 @@ pub(crate) fn target() -> Target { base.features = "-mmx,-sse,+soft-float".into(); base.rustc_abi = Some(RustcAbi::X86Softfloat); + // Turn off DWARF. This fixes an lld warning, "section name .debug_frame is longer than 8 + // characters and will use a non-standard string table". That section will not be created if + // DWARF is disabled. + // + // This is only needed in the i686 target due to using the `-gnu` LLVM target (see below). + add_link_args(&mut base.post_link_args, LinkerFlavor::Msvc(Lld::No), &["/DEBUG:NODWARF"]); + // Use -GNU here, because of the reason below: // Background and Problem: // If we use i686-unknown-windows, the LLVM IA32 MSVC generates compiler intrinsic diff --git a/library/Cargo.lock b/library/Cargo.lock index 8b78908e6d7..930db2ecd57 100644 --- a/library/Cargo.lock +++ b/library/Cargo.lock @@ -61,9 +61,9 @@ dependencies = [ [[package]] name = "compiler_builtins" -version = "0.1.145" +version = "0.1.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0705f5abaaab7168ccc14f8f340ded61be2bd3ebea86b9834b6acbc8495de8" +checksum = "a97117b1434b79833f39a5fabdf82f890bd98c1988334dea1cb67f7e627fa311" dependencies = [ "cc", "rustc-std-workspace-core", diff --git a/library/alloc/Cargo.toml b/library/alloc/Cargo.toml index db7eaf52fb2..6b0b5761391 100644 --- a/library/alloc/Cargo.toml +++ b/library/alloc/Cargo.toml @@ -10,7 +10,7 @@ edition = "2021" [dependencies] core = { path = "../core" } -compiler_builtins = { version = "=0.1.145", features = ['rustc-dep-of-std'] } +compiler_builtins = { version = "=0.1.146", features = ['rustc-dep-of-std'] } [dev-dependencies] rand = { version = "0.8.5", default-features = false, features = ["alloc"] } diff --git a/library/portable-simd/.github/workflows/ci.yml b/library/portable-simd/.github/workflows/ci.yml index b292be2d6f9..3984d8f0d8d 100644 --- a/library/portable-simd/.github/workflows/ci.yml +++ b/library/portable-simd/.github/workflows/ci.yml @@ -9,6 +9,7 @@ on: env: CARGO_NET_RETRY: 10 RUSTUP_MAX_RETRIES: 10 + PROPTEST_CASES: 64 jobs: rustfmt: @@ -16,12 +17,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Setup Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - rustup component add rustfmt + - uses: actions/checkout@v4 - name: Run rustfmt run: cargo fmt --all -- --check @@ -37,7 +33,9 @@ jobs: - i686-unknown-linux-gnu - i586-unknown-linux-gnu - aarch64-unknown-linux-gnu + - arm64ec-pc-windows-msvc - armv7-unknown-linux-gnueabihf + - loongarch64-unknown-linux-gnu # non-nightly since https://github.com/rust-lang/rust/pull/113274 # - mips-unknown-linux-gnu # - mips64-unknown-linux-gnuabi64 @@ -49,13 +47,9 @@ jobs: - wasm32-unknown-unknown steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - rustup target add ${{ matrix.target }} - rustup component add clippy + run: rustup target add ${{ matrix.target }} - name: Run Clippy run: cargo clippy --all-targets --target ${{ matrix.target }} @@ -65,26 +59,19 @@ jobs: strategy: fail-fast: false matrix: - target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu, x86_64-apple-darwin] + target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu] # `default` means we use the default target config for the target, # `native` means we run with `-Ctarget-cpu=native`, and anything else is # an arg to `-Ctarget-feature` target_feature: [default, native, +sse3, +ssse3, +sse4.1, +sse4.2, +avx, +avx2] exclude: - # The macos runners seem to only reliably support up to `avx`. - - { target: x86_64-apple-darwin, target_feature: +avx2 } - # These features are statically known to be present for all 64 bit - # macs, and thus are covered by the `default` test - - { target: x86_64-apple-darwin, target_feature: +sse3 } - - { target: x86_64-apple-darwin, target_feature: +ssse3 } # -Ctarget-cpu=native sounds like bad-news if target != host - { target: i686-pc-windows-msvc, target_feature: native } - { target: i586-pc-windows-msvc, target_feature: native } include: # Populate the `matrix.os` field - - { target: x86_64-apple-darwin, os: macos-latest } - { target: x86_64-unknown-linux-gnu, os: ubuntu-latest } - { target: x86_64-pc-windows-msvc, os: windows-latest } - { target: i686-pc-windows-msvc, os: windows-latest } @@ -98,12 +85,9 @@ jobs: # avx512vl, but occasionally doesn't. Maybe one day we can enable it. steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - rustup target add ${{ matrix.target }} + run: rustup target add ${{ matrix.target }} - name: Configure RUSTFLAGS shell: bash @@ -145,6 +129,35 @@ jobs: run: cargo doc --verbose --target=${{ matrix.target }} env: RUSTDOCFLAGS: -Dwarnings + + macos-tests: + name: ${{ matrix.target }} + runs-on: macos-latest + strategy: + fail-fast: false + matrix: + target: + - aarch64-apple-darwin + - x86_64-apple-darwin + steps: + - uses: actions/checkout@v4 + - name: Setup Rust + run: rustup target add ${{ matrix.target }} + + - name: Configure RUSTFLAGS + shell: bash + run: echo "RUSTFLAGS=-Dwarnings" >> $GITHUB_ENV + + - name: Test (debug) + run: cargo test --verbose --target=${{ matrix.target }} + + - name: Test (release) + run: cargo test --verbose --target=${{ matrix.target }} --release + + - name: Generate docs + run: cargo doc --verbose --target=${{ matrix.target }} + env: + RUSTDOCFLAGS: -Dwarnings wasm-tests: name: "wasm (firefox, ${{ matrix.name }})" @@ -155,11 +168,7 @@ jobs: - { name: default, RUSTFLAGS: "" } - { name: simd128, RUSTFLAGS: "-C target-feature=+simd128" } steps: - - uses: actions/checkout@v2 - - name: Setup Rust - run: | - rustup update nightly --no-self-update - rustup default nightly + - uses: actions/checkout@v4 - name: Install wasm-pack run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - name: Test (debug) @@ -174,6 +183,8 @@ jobs: cross-tests: name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)" runs-on: ubuntu-latest + env: + PROPTEST_CASES: 16 strategy: fail-fast: false @@ -185,6 +196,7 @@ jobs: - powerpc-unknown-linux-gnu - powerpc64le-unknown-linux-gnu # includes altivec by default - riscv64gc-unknown-linux-gnu + - loongarch64-unknown-linux-gnu # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing # non-nightly since https://github.com/rust-lang/rust/pull/113274 # - mips-unknown-linux-gnu @@ -201,24 +213,14 @@ jobs: # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" } steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - rustup target add ${{ matrix.target }} - rustup component add rust-src + run: rustup target add ${{ matrix.target }} - name: Install Cross - # Equivalent to `cargo install cross`, but downloading a prebuilt - # binary. Ideally we wouldn't hardcode a version, but the version number - # being part of the tarball means we can't just use the download/latest - # URL :( + # Install the latest git version for newer targets. run: | - CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz - mkdir -p "$HOME/.bin" - curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin" - echo "$HOME/.bin" >> $GITHUB_PATH + cargo install cross --git https://github.com/cross-rs/cross --rev 4090beca3cfffa44371a5bba524de3a578aa46c3 - name: Configure Emulated CPUs run: | @@ -242,34 +244,11 @@ jobs: - name: Test (release) run: cross test --verbose --target=${{ matrix.target }} --release - features: - name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})" + miri: runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - simd: - - "" - - "avx512" - features: - - "" - - "--features std" - - "--features all_lane_counts" - - "--all-features" - + env: + PROPTEST_CASES: 16 steps: - - uses: actions/checkout@v2 - - name: Setup Rust - run: | - rustup update nightly --no-self-update - rustup default nightly - - name: Detect AVX512 - run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV - - name: Check build - if: ${{ matrix.simd == '' }} - run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }} - - name: Check AVX - if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }} - run: | - echo "Found AVX features: $CPU_FEATURE" - RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }} + - uses: actions/checkout@v4 + - name: Test (Miri) + run: cargo miri test diff --git a/library/portable-simd/.github/workflows/doc.yml b/library/portable-simd/.github/workflows/doc.yml index 9d1fa66ccb5..22c2cb3f67f 100644 --- a/library/portable-simd/.github/workflows/doc.yml +++ b/library/portable-simd/.github/workflows/doc.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@v1 + uses: actions/checkout@v4 - name: Setup Rust run: | diff --git a/library/portable-simd/.gitignore b/library/portable-simd/.gitignore index ea8c4bf7f35..9673e52dcad 100644 --- a/library/portable-simd/.gitignore +++ b/library/portable-simd/.gitignore @@ -1 +1,2 @@ /target +git-subtree.sh diff --git a/library/portable-simd/Cargo.toml b/library/portable-simd/Cargo.toml index d1732aaec2f..21d4584a9f4 100644 --- a/library/portable-simd/Cargo.toml +++ b/library/portable-simd/Cargo.toml @@ -5,3 +5,9 @@ members = [ "crates/std_float", "crates/test_helpers", ] + +[profile.test.package."*"] +opt-level = 2 + +[profile.test.package.test_helpers] +opt-level = 2 diff --git a/library/portable-simd/Cross.toml b/library/portable-simd/Cross.toml new file mode 100644 index 00000000000..d21e76b92dd --- /dev/null +++ b/library/portable-simd/Cross.toml @@ -0,0 +1,2 @@ +[build.env] +passthrough = ["PROPTEST_CASES"] diff --git a/library/portable-simd/crates/core_simd/Cargo.toml b/library/portable-simd/crates/core_simd/Cargo.toml index b4a8fd70f4c..a7a6d43b11d 100644 --- a/library/portable-simd/crates/core_simd/Cargo.toml +++ b/library/portable-simd/crates/core_simd/Cargo.toml @@ -9,10 +9,9 @@ categories = ["hardware-support", "no-std"] license = "MIT OR Apache-2.0" [features] -default = ["as_crate"] +default = ["as_crate", "std"] as_crate = [] std = [] -all_lane_counts = [] [target.'cfg(target_arch = "wasm32")'.dev-dependencies] wasm-bindgen = "0.2" diff --git a/library/portable-simd/crates/core_simd/src/lane_count.rs b/library/portable-simd/crates/core_simd/src/lane_count.rs index 4cd7265ed67..280b27bc9bc 100644 --- a/library/portable-simd/crates/core_simd/src/lane_count.rs +++ b/library/portable-simd/crates/core_simd/src/lane_count.rs @@ -33,10 +33,8 @@ macro_rules! supported_lane_count { }; } -supported_lane_count!(1, 2, 4, 8, 16, 32, 64); -#[cfg(feature = "all_lane_counts")] supported_lane_count!( - 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63 + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64 ); diff --git a/library/portable-simd/crates/core_simd/src/lib.rs b/library/portable-simd/crates/core_simd/src/lib.rs index 992a7705e3c..7f57847c9c2 100644 --- a/library/portable-simd/crates/core_simd/src/lib.rs +++ b/library/portable-simd/crates/core_simd/src/lib.rs @@ -1,7 +1,6 @@ #![no_std] #![feature( - const_refs_to_cell, - const_mut_refs, + const_eval_select, convert_float_to_int, core_intrinsics, decl_macro, @@ -26,6 +25,7 @@ all(target_arch = "arm", target_feature = "v7"), feature(stdarch_arm_neon_intrinsics) )] +#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))] #![cfg_attr( any(target_arch = "powerpc", target_arch = "powerpc64"), feature(stdarch_powerpc) diff --git a/library/portable-simd/crates/core_simd/src/masks.rs b/library/portable-simd/crates/core_simd/src/masks.rs index 04de3a96827..b763a7c75a5 100644 --- a/library/portable-simd/crates/core_simd/src/masks.rs +++ b/library/portable-simd/crates/core_simd/src/masks.rs @@ -308,48 +308,6 @@ where Self(mask_impl::Mask::from_bitmask_integer(bitmask)) } - /// Creates a bitmask vector from a mask. - /// - /// Each bit is set if the corresponding element in the mask is `true`. - /// The remaining bits are unset. - /// - /// The bits are packed into the first N bits of the vector: - /// ``` - /// # #![feature(portable_simd)] - /// # #[cfg(feature = "as_crate")] use core_simd::simd; - /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::mask32x8; - /// let mask = mask32x8::from_array([true, false, true, false, false, false, true, false]); - /// assert_eq!(mask.to_bitmask_vector()[0], 0b01000101); - /// ``` - #[inline] - #[must_use = "method returns a new integer and does not mutate the original value"] - pub fn to_bitmask_vector(self) -> Simd<u8, N> { - self.0.to_bitmask_vector() - } - - /// Creates a mask from a bitmask vector. - /// - /// For each bit, if it is set, the corresponding element in the mask is set to `true`. - /// - /// The bits are packed into the first N bits of the vector: - /// ``` - /// # #![feature(portable_simd)] - /// # #[cfg(feature = "as_crate")] use core_simd::simd; - /// # #[cfg(not(feature = "as_crate"))] use core::simd; - /// # use simd::{mask32x8, u8x8}; - /// let bitmask = u8x8::from_array([0b01000101, 0, 0, 0, 0, 0, 0, 0]); - /// assert_eq!( - /// mask32x8::from_bitmask_vector(bitmask), - /// mask32x8::from_array([true, false, true, false, false, false, true, false]), - /// ); - /// ``` - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self { - Self(mask_impl::Mask::from_bitmask_vector(bitmask)) - } - /// Finds the index of the first set element. /// /// ``` diff --git a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs index 96c553426ee..db4312d5bf8 100644 --- a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs +++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs @@ -123,23 +123,6 @@ where } #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_bitmask_vector(self) -> Simd<u8, N> { - let mut bitmask = Simd::splat(0); - bitmask.as_mut_array()[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref()); - bitmask - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self { - let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default(); - let len = bytes.as_ref().len(); - bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]); - Self(bytes, PhantomData) - } - - #[inline] pub fn to_bitmask_integer(self) -> u64 { let mut bitmask = [0u8; 8]; bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref()); diff --git a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs index 87f031a9f36..2d01946b574 100644 --- a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs +++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs @@ -141,62 +141,6 @@ where } #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_bitmask_vector(self) -> Simd<u8, N> { - let mut bitmask = Simd::splat(0); - - // Safety: Bytes is the right size array - unsafe { - // Compute the bitmask - let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask = - core::intrinsics::simd::simd_bitmask(self.0); - - // LLVM assumes bit order should match endianness - if cfg!(target_endian = "big") { - for x in bytes.as_mut() { - *x = x.reverse_bits() - } - if N % 8 > 0 { - bytes.as_mut()[N / 8] >>= 8 - N % 8; - } - } - - bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref()); - } - - bitmask - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self { - let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default(); - - // Safety: Bytes is the right size array - unsafe { - let len = bytes.as_ref().len(); - bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]); - - // LLVM assumes bit order should match endianness - if cfg!(target_endian = "big") { - for x in bytes.as_mut() { - *x = x.reverse_bits(); - } - if N % 8 > 0 { - bytes.as_mut()[N / 8] >>= 8 - N % 8; - } - } - - // Compute the regular mask - Self::from_int_unchecked(core::intrinsics::simd::simd_select_bitmask( - bytes, - Self::splat(true).to_int(), - Self::splat(false).to_int(), - )) - } - } - - #[inline] unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U where LaneCount<M>: SupportedLaneCount, @@ -283,7 +227,7 @@ where } #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] + #[must_use = "method returns a new bool and does not mutate the original value"] pub fn all(self) -> bool { // Safety: use `self` as an integer vector unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) } diff --git a/library/portable-simd/crates/core_simd/src/ops.rs b/library/portable-simd/crates/core_simd/src/ops.rs index dd7303a97b1..d3bd14a3402 100644 --- a/library/portable-simd/crates/core_simd/src/ops.rs +++ b/library/portable-simd/crates/core_simd/src/ops.rs @@ -77,7 +77,7 @@ macro_rules! int_divrem_guard { ( $lhs:ident, $rhs:ident, { const PANIC_ZERO: &'static str = $zero:literal; - $simd_call:ident + $simd_call:ident, $op:tt }, $int:ident ) => { if $rhs.simd_eq(Simd::splat(0 as _)).any() { @@ -96,8 +96,23 @@ macro_rules! int_divrem_guard { // Nice base case to make it easy to const-fold away the other branch. $rhs }; - // Safety: $lhs and rhs are vectors - unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) } + + // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two + // these operations aren't vectorized on aarch64 anyway + #[cfg(target_arch = "aarch64")] + { + let mut out = Simd::splat(0 as _); + for i in 0..Self::LEN { + out[i] = $lhs[i] $op rhs[i]; + } + out + } + + #[cfg(not(target_arch = "aarch64"))] + { + // Safety: $lhs and rhs are vectors + unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) } + } } }; } @@ -205,14 +220,14 @@ for_base_ops! { impl Div::div { int_divrem_guard { const PANIC_ZERO: &'static str = "attempt to divide by zero"; - simd_div + simd_div, / } } impl Rem::rem { int_divrem_guard { const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; - simd_rem + simd_rem, % } } diff --git a/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs b/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs index 5b4615ce51d..93989ce91b8 100644 --- a/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs +++ b/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs @@ -12,7 +12,7 @@ pub trait SimdPartialEq { #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_eq(self, other: Self) -> Self::Mask; - /// Test if each element is equal to the corresponding element in `other`. + /// Test if each element is not equal to the corresponding element in `other`. #[must_use = "method returns a new mask and does not mutate the original value"] fn simd_ne(self, other: Self) -> Self::Mask; } diff --git a/library/portable-simd/crates/core_simd/src/simd/num/float.rs b/library/portable-simd/crates/core_simd/src/simd/num/float.rs index 59e43851ea8..79954b937b3 100644 --- a/library/portable-simd/crates/core_simd/src/simd/num/float.rs +++ b/library/portable-simd/crates/core_simd/src/simd/num/float.rs @@ -255,6 +255,7 @@ macro_rules! impl_trait { type Bits = Simd<$bits_ty, N>; type Cast<T: SimdElement> = Simd<T, N>; + #[cfg(not(target_arch = "aarch64"))] #[inline] fn cast<T: SimdCast>(self) -> Self::Cast<T> { @@ -262,6 +263,33 @@ macro_rules! impl_trait { unsafe { core::intrinsics::simd::simd_as(self) } } + // https://github.com/llvm/llvm-project/issues/94694 + #[cfg(target_arch = "aarch64")] + #[inline] + fn cast<T: SimdCast>(self) -> Self::Cast<T> + { + const { assert!(N <= 64) }; + if N <= 2 || N == 4 || N == 8 || N == 16 || N == 32 || N == 64 { + // Safety: supported types are guaranteed by SimdCast + unsafe { core::intrinsics::simd::simd_as(self) } + } else if N < 4 { + let x = self.resize::<4>(Default::default()).cast(); + x.resize::<N>(x[0]) + } else if N < 8 { + let x = self.resize::<8>(Default::default()).cast(); + x.resize::<N>(x[0]) + } else if N < 16 { + let x = self.resize::<16>(Default::default()).cast(); + x.resize::<N>(x[0]) + } else if N < 32 { + let x = self.resize::<32>(Default::default()).cast(); + x.resize::<N>(x[0]) + } else { + let x = self.resize::<64>(Default::default()).cast(); + x.resize::<N>(x[0]) + } + } + #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I> @@ -391,7 +419,7 @@ macro_rules! impl_trait { self.as_array().iter().sum() } else { // Safety: `self` is a float vector - unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0.) } + unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, -0.) } } } diff --git a/library/portable-simd/crates/core_simd/src/simd/num/int.rs b/library/portable-simd/crates/core_simd/src/simd/num/int.rs index d7598d9ceaf..3a51235ff95 100644 --- a/library/portable-simd/crates/core_simd/src/simd/num/int.rs +++ b/library/portable-simd/crates/core_simd/src/simd/num/int.rs @@ -1,6 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement, + cmp::SimdOrd, cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, }; @@ -70,11 +70,27 @@ pub trait SimdInt: Copy + Sealed { /// # #[cfg(not(feature = "as_crate"))] use core::simd; /// # use simd::prelude::*; /// use core::i32::{MIN, MAX}; - /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]); + /// let xs = Simd::from_array([MIN, MIN + 1, -5, 0]); /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0])); /// ``` fn abs(self) -> Self; + /// Lanewise absolute difference. + /// Every element becomes the absolute difference of `self` and `second`. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::prelude::*; + /// use core::i32::{MIN, MAX}; + /// let a = Simd::from_array([MIN, MAX, 100, -100]); + /// let b = Simd::from_array([MAX, MIN, -80, -120]); + /// assert_eq!(a.abs_diff(b), Simd::from_array([u32::MAX, u32::MAX, 180, 20])); + /// ``` + fn abs_diff(self, second: Self) -> Self::Unsigned; + /// Lanewise saturating absolute value, implemented in Rust. /// As abs(), except the MIN value becomes MAX instead of itself. /// @@ -203,6 +219,12 @@ pub trait SimdInt: Copy + Sealed { /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc. fn reverse_bits(self) -> Self; + /// Returns the number of ones in the binary representation of each element. + fn count_ones(self) -> Self::Unsigned; + + /// Returns the number of zeros in the binary representation of each element. + fn count_zeros(self) -> Self::Unsigned; + /// Returns the number of leading zeros in the binary representation of each element. fn leading_zeros(self) -> Self::Unsigned; @@ -260,6 +282,13 @@ macro_rules! impl_trait { } #[inline] + fn abs_diff(self, second: Self) -> Self::Unsigned { + let max = self.simd_max(second); + let min = self.simd_min(second); + (max - min).cast() + } + + #[inline] fn saturating_abs(self) -> Self { // arith shift for -1 or 0 mask based on sign bit, giving 2s complement const SHR: $ty = <$ty>::BITS as $ty - 1; @@ -345,6 +374,16 @@ macro_rules! impl_trait { } #[inline] + fn count_ones(self) -> Self::Unsigned { + self.cast::<$unsigned>().count_ones() + } + + #[inline] + fn count_zeros(self) -> Self::Unsigned { + self.cast::<$unsigned>().count_zeros() + } + + #[inline] fn leading_zeros(self) -> Self::Unsigned { self.cast::<$unsigned>().leading_zeros() } diff --git a/library/portable-simd/crates/core_simd/src/simd/num/uint.rs b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs index 53dd97f501c..1ab2d8c7b73 100644 --- a/library/portable-simd/crates/core_simd/src/simd/num/uint.rs +++ b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; +use crate::simd::{cmp::SimdOrd, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; /// Operations on SIMD vectors of unsigned integers. pub trait SimdUint: Copy + Sealed { @@ -57,6 +57,22 @@ pub trait SimdUint: Copy + Sealed { /// assert_eq!(sat, Simd::splat(0)); fn saturating_sub(self, second: Self) -> Self; + /// Lanewise absolute difference. + /// Every element becomes the absolute difference of `self` and `second`. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::prelude::*; + /// use core::u32::MAX; + /// let a = Simd::from_array([0, MAX, 100, 20]); + /// let b = Simd::from_array([MAX, 0, 80, 200]); + /// assert_eq!(a.abs_diff(b), Simd::from_array([MAX, MAX, 20, 180])); + /// ``` + fn abs_diff(self, second: Self) -> Self; + /// Returns the sum of the elements of the vector, with wrapping addition. fn reduce_sum(self) -> Self::Scalar; @@ -85,6 +101,12 @@ pub trait SimdUint: Copy + Sealed { /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc. fn reverse_bits(self) -> Self; + /// Returns the number of ones in the binary representation of each element. + fn count_ones(self) -> Self; + + /// Returns the number of zeros in the binary representation of each element. + fn count_zeros(self) -> Self; + /// Returns the number of leading zeros in the binary representation of each element. fn leading_zeros(self) -> Self; @@ -139,6 +161,13 @@ macro_rules! impl_trait { } #[inline] + fn abs_diff(self, second: Self) -> Self { + let max = self.simd_max(second); + let min = self.simd_min(second); + max - min + } + + #[inline] fn reduce_sum(self) -> Self::Scalar { // Safety: `self` is an integer vector unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0) } @@ -193,6 +222,17 @@ macro_rules! impl_trait { } #[inline] + fn count_ones(self) -> Self { + // Safety: `self` is an integer vector + unsafe { core::intrinsics::simd::simd_ctpop(self) } + } + + #[inline] + fn count_zeros(self) -> Self { + (!self).count_ones() + } + + #[inline] fn leading_zeros(self) -> Self { // Safety: `self` is an integer vector unsafe { core::intrinsics::simd::simd_ctlz(self) } diff --git a/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs b/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs index be635ea640b..47383809ffb 100644 --- a/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs +++ b/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs @@ -42,6 +42,19 @@ pub trait SimdConstPtr: Copy + Sealed { /// Equivalent to calling [`pointer::addr`] on each element. fn addr(self) -> Self::Usize; + /// Converts an address to a pointer without giving it any provenance. + /// + /// Without provenance, this pointer is not associated with any actual allocation. Such a + /// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but + /// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers + /// are little more than a usize address in disguise. + /// + /// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a + /// previously exposed provenance. + /// + /// Equivalent to calling [`core::ptr::without_provenance`] on each element. + fn without_provenance(addr: Self::Usize) -> Self; + /// Creates a new pointer with the given address. /// /// This performs the same operation as a cast, but copies the *address-space* and @@ -119,6 +132,14 @@ where } #[inline] + fn without_provenance(addr: Self::Usize) -> Self { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any + // provenance). + unsafe { core::mem::transmute_copy(&addr) } + } + + #[inline] fn with_addr(self, addr: Self::Usize) -> Self { // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // diff --git a/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs b/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs index f6823a949e3..3f20eef21a3 100644 --- a/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs +++ b/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs @@ -39,6 +39,19 @@ pub trait SimdMutPtr: Copy + Sealed { /// Equivalent to calling [`pointer::addr`] on each element. fn addr(self) -> Self::Usize; + /// Converts an address to a pointer without giving it any provenance. + /// + /// Without provenance, this pointer is not associated with any actual allocation. Such a + /// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but + /// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers + /// are little more than a usize address in disguise. + /// + /// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a + /// previously exposed provenance. + /// + /// Equivalent to calling [`core::ptr::without_provenance`] on each element. + fn without_provenance(addr: Self::Usize) -> Self; + /// Creates a new pointer with the given address. /// /// This performs the same operation as a cast, but copies the *address-space* and @@ -116,6 +129,14 @@ where } #[inline] + fn without_provenance(addr: Self::Usize) -> Self { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any + // provenance). + unsafe { core::mem::transmute_copy(&addr) } + } + + #[inline] fn with_addr(self, addr: Self::Usize) -> Self { // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. // diff --git a/library/portable-simd/crates/core_simd/src/swizzle.rs b/library/portable-simd/crates/core_simd/src/swizzle.rs index d62642fb906..42425ef37e5 100644 --- a/library/portable-simd/crates/core_simd/src/swizzle.rs +++ b/library/portable-simd/crates/core_simd/src/swizzle.rs @@ -155,8 +155,7 @@ pub trait Swizzle<const N: usize> { /// Creates a new mask from the elements of `mask`. /// - /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of - /// `first` and `second`. + /// Element `i` of the output is `mask[Self::INDEX[i]]`. #[inline] #[must_use = "method returns a new mask and does not mutate the original inputs"] fn swizzle_mask<T, const M: usize>(mask: Mask<T, M>) -> Mask<T, N> @@ -260,6 +259,50 @@ where Rotate::<OFFSET>::swizzle(self) } + /// Shifts the vector elements to the left by `OFFSET`, filling in with + /// `padding` from the right. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn shift_elements_left<const OFFSET: usize>(self, padding: T) -> Self { + struct Shift<const OFFSET: usize>; + + impl<const OFFSET: usize, const N: usize> Swizzle<N> for Shift<OFFSET> { + const INDEX: [usize; N] = const { + let mut index = [N; N]; + let mut i = 0; + while i + OFFSET < N { + index[i] = i + OFFSET; + i += 1; + } + index + }; + } + + Shift::<OFFSET>::concat_swizzle(self, Simd::splat(padding)) + } + + /// Shifts the vector elements to the right by `OFFSET`, filling in with + /// `padding` from the left. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn shift_elements_right<const OFFSET: usize>(self, padding: T) -> Self { + struct Shift<const OFFSET: usize>; + + impl<const OFFSET: usize, const N: usize> Swizzle<N> for Shift<OFFSET> { + const INDEX: [usize; N] = const { + let mut index = [N; N]; + let mut i = OFFSET; + while i < N { + index[i] = i - OFFSET; + i += 1; + } + index + }; + } + + Shift::<OFFSET>::concat_swizzle(self, Simd::splat(padding)) + } + /// Interleave two vectors. /// /// The resulting vectors contain elements taken alternatively from `self` and `other`, first @@ -320,7 +363,9 @@ where /// /// ``` /// # #![feature(portable_simd)] - /// # use core::simd::Simd; + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::Simd; /// let a = Simd::from_array([0, 4, 1, 5]); /// let b = Simd::from_array([2, 6, 3, 7]); /// let (x, y) = a.deinterleave(b); @@ -391,4 +436,210 @@ where } Resize::<N>::concat_swizzle(self, Simd::splat(value)) } + + /// Extract a vector from another vector. + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::u32x4; + /// let x = u32x4::from_array([0, 1, 2, 3]); + /// assert_eq!(x.extract::<1, 2>().to_array(), [1, 2]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn extract<const START: usize, const LEN: usize>(self) -> Simd<T, LEN> + where + LaneCount<LEN>: SupportedLaneCount, + { + struct Extract<const N: usize, const START: usize>; + impl<const N: usize, const START: usize, const LEN: usize> Swizzle<LEN> for Extract<N, START> { + const INDEX: [usize; LEN] = const { + assert!(START + LEN <= N, "index out of bounds"); + let mut index = [0; LEN]; + let mut i = 0; + while i < LEN { + index[i] = START + i; + i += 1; + } + index + }; + } + Extract::<N, START>::swizzle(self) + } +} + +impl<T, const N: usize> Mask<T, N> +where + T: MaskElement, + LaneCount<N>: SupportedLaneCount, +{ + /// Reverse the order of the elements in the mask. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn reverse(self) -> Self { + // Safety: swizzles are safe for masks + unsafe { Self::from_int_unchecked(self.to_int().reverse()) } + } + + /// Rotates the mask such that the first `OFFSET` elements of the slice move to the end + /// while the last `self.len() - OFFSET` elements move to the front. After calling `rotate_elements_left`, + /// the element previously at index `OFFSET` will become the first element in the slice. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn rotate_elements_left<const OFFSET: usize>(self) -> Self { + // Safety: swizzles are safe for masks + unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_left::<OFFSET>()) } + } + + /// Rotates the mask such that the first `self.len() - OFFSET` elements of the mask move to + /// the end while the last `OFFSET` elements move to the front. After calling `rotate_elements_right`, + /// the element previously at index `self.len() - OFFSET` will become the first element in the slice. + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn rotate_elements_right<const OFFSET: usize>(self) -> Self { + // Safety: swizzles are safe for masks + unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_right::<OFFSET>()) } + } + + /// Shifts the mask elements to the left by `OFFSET`, filling in with + /// `padding` from the right. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original inputs"] + pub fn shift_elements_left<const OFFSET: usize>(self, padding: bool) -> Self { + // Safety: swizzles are safe for masks + unsafe { + Self::from_int_unchecked(self.to_int().shift_elements_left::<OFFSET>(if padding { + T::TRUE + } else { + T::FALSE + })) + } + } + + /// Shifts the mask elements to the right by `OFFSET`, filling in with + /// `padding` from the left. + #[inline] + #[must_use = "method returns a new mask and does not mutate the original inputs"] + pub fn shift_elements_right<const OFFSET: usize>(self, padding: bool) -> Self { + // Safety: swizzles are safe for masks + unsafe { + Self::from_int_unchecked(self.to_int().shift_elements_right::<OFFSET>(if padding { + T::TRUE + } else { + T::FALSE + })) + } + } + + /// Interleave two masks. + /// + /// The resulting masks contain elements taken alternatively from `self` and `other`, first + /// filling the first result, and then the second. + /// + /// The reverse of this operation is [`Mask::deinterleave`]. + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::mask32x4; + /// let a = mask32x4::from_array([false, true, false, true]); + /// let b = mask32x4::from_array([false, false, true, true]); + /// let (x, y) = a.interleave(b); + /// assert_eq!(x.to_array(), [false, false, true, false]); + /// assert_eq!(y.to_array(), [false, true, true, true]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn interleave(self, other: Self) -> (Self, Self) { + let (lo, hi) = self.to_int().interleave(other.to_int()); + // Safety: swizzles are safe for masks + unsafe { (Self::from_int_unchecked(lo), Self::from_int_unchecked(hi)) } + } + + /// Deinterleave two masks. + /// + /// The first result takes every other element of `self` and then `other`, starting with + /// the first element. + /// + /// The second result takes every other element of `self` and then `other`, starting with + /// the second element. + /// + /// The reverse of this operation is [`Mask::interleave`]. + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::mask32x4; + /// let a = mask32x4::from_array([false, true, false, true]); + /// let b = mask32x4::from_array([false, false, true, true]); + /// let (x, y) = a.deinterleave(b); + /// assert_eq!(x.to_array(), [false, false, false, true]); + /// assert_eq!(y.to_array(), [true, true, false, true]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn deinterleave(self, other: Self) -> (Self, Self) { + let (even, odd) = self.to_int().deinterleave(other.to_int()); + // Safety: swizzles are safe for masks + unsafe { + ( + Self::from_int_unchecked(even), + Self::from_int_unchecked(odd), + ) + } + } + + /// Resize a mask. + /// + /// If `M` > `N`, extends the length of a mask, setting the new elements to `value`. + /// If `M` < `N`, truncates the mask to the first `M` elements. + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::mask32x4; + /// let x = mask32x4::from_array([false, true, true, false]); + /// assert_eq!(x.resize::<8>(true).to_array(), [false, true, true, false, true, true, true, true]); + /// assert_eq!(x.resize::<2>(true).to_array(), [false, true]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn resize<const M: usize>(self, value: bool) -> Mask<T, M> + where + LaneCount<M>: SupportedLaneCount, + { + // Safety: swizzles are safe for masks + unsafe { + Mask::<T, M>::from_int_unchecked(self.to_int().resize::<M>(if value { + T::TRUE + } else { + T::FALSE + })) + } + } + + /// Extract a vector from another vector. + /// + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::mask32x4; + /// let x = mask32x4::from_array([false, true, true, false]); + /// assert_eq!(x.extract::<1, 2>().to_array(), [true, true]); + /// ``` + #[inline] + #[must_use = "method returns a new vector and does not mutate the original inputs"] + pub fn extract<const START: usize, const LEN: usize>(self) -> Mask<T, LEN> + where + LaneCount<LEN>: SupportedLaneCount, + { + // Safety: swizzles are safe for masks + unsafe { Mask::<T, LEN>::from_int_unchecked(self.to_int().extract::<START, LEN>()) } + } } diff --git a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs index 3b6388d0f27..773bd028bae 100644 --- a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs +++ b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs @@ -59,15 +59,40 @@ where target_endian = "little" ))] 16 => transize(vqtbl1q_u8, self, idxs), + #[cfg(all( + target_arch = "arm", + target_feature = "v7", + target_feature = "neon", + target_endian = "little" + ))] + 16 => transize(armv7_neon_swizzle_u8x16, self, idxs), #[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))] 32 => transize(avx2_pshufb, self, idxs), #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))] - 32 => transize(x86::_mm256_permutexvar_epi8, zeroing_idxs(idxs), self), - // Notable absence: avx512bw shuffle - // If avx512bw is available, odds of avx512vbmi are good - // FIXME: initial AVX512VBMI variant didn't actually pass muster - // #[cfg(target_feature = "avx512vbmi")] - // 64 => transize(x86::_mm512_permutexvar_epi8, self, idxs), + 32 => { + // Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit + let swizzler = |bytes, idxs| { + let mask = x86::_mm256_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>( + idxs, + Simd::<u8, 32>::splat(N as u8).into(), + ); + x86::_mm256_maskz_permutexvar_epi8(mask, idxs, bytes) + }; + transize(swizzler, self, idxs) + } + // Notable absence: avx512bw pshufb shuffle + #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))] + 64 => { + // Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit + let swizzler = |bytes, idxs| { + let mask = x86::_mm512_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>( + idxs, + Simd::<u8, 64>::splat(N as u8).into(), + ); + x86::_mm512_maskz_permutexvar_epi8(mask, idxs, bytes) + }; + transize(swizzler, self, idxs) + } _ => { let mut array = [0; N]; for (i, k) in idxs.to_array().into_iter().enumerate() { @@ -82,6 +107,28 @@ where } } +/// armv7 neon supports swizzling `u8x16` by swizzling two u8x8 blocks +/// with a u8x8x2 lookup table. +/// +/// # Safety +/// This requires armv7 neon to work +#[cfg(all( + target_arch = "arm", + target_feature = "v7", + target_feature = "neon", + target_endian = "little" +))] +unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd<u8, 16>, idxs: Simd<u8, 16>) -> Simd<u8, 16> { + use core::arch::arm::{uint8x8x2_t, vcombine_u8, vget_high_u8, vget_low_u8, vtbl2_u8}; + // SAFETY: Caller promised arm neon support + unsafe { + let bytes = uint8x8x2_t(vget_low_u8(bytes.into()), vget_high_u8(bytes.into())); + let lo = vtbl2_u8(bytes, vget_low_u8(idxs.into())); + let hi = vtbl2_u8(bytes, vget_high_u8(idxs.into())); + vcombine_u8(lo, hi).into() + } +} + /// "vpshufb like it was meant to be" on AVX2 /// /// # Safety diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs index 3e239169149..9c4dd36c24f 100644 --- a/library/portable-simd/crates/core_simd/src/vector.rs +++ b/library/portable-simd/crates/core_simd/src/vector.rs @@ -99,7 +99,7 @@ use crate::simd::{ // directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also // causes rustc to emit illegal LLVM IR in some cases. -#[repr(simd)] +#[repr(simd, packed)] pub struct Simd<T, const N: usize>([T; N]) where LaneCount<N>: SupportedLaneCount, @@ -144,14 +144,32 @@ where /// assert_eq!(v.as_array(), &[8, 8, 8, 8]); /// ``` #[inline] - pub fn splat(value: T) -> Self { - // This is preferred over `[value; N]`, since it's explicitly a splat: - // https://github.com/rust-lang/rust/issues/97804 - struct Splat; - impl<const N: usize> Swizzle<N> for Splat { - const INDEX: [usize; N] = [0; N]; + #[rustc_const_unstable(feature = "portable_simd", issue = "86656")] + pub const fn splat(value: T) -> Self { + const fn splat_const<T, const N: usize>(value: T) -> Simd<T, N> + where + T: SimdElement, + LaneCount<N>: SupportedLaneCount, + { + Simd::from_array([value; N]) } - Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value])) + + fn splat_rt<T, const N: usize>(value: T) -> Simd<T, N> + where + T: SimdElement, + LaneCount<N>: SupportedLaneCount, + { + // This is preferred over `[value; N]`, since it's explicitly a splat: + // https://github.com/rust-lang/rust/issues/97804 + struct Splat; + impl<const N: usize> Swizzle<N> for Splat { + const INDEX: [usize; N] = [0; N]; + } + + Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value])) + } + + core::intrinsics::const_eval_select((value,), splat_const, splat_rt) } /// Returns an array reference containing the entire SIMD vector. @@ -425,6 +443,9 @@ where /// /// When the element is disabled, that memory location is not accessed and the corresponding /// value from `or` is passed through. + /// + /// # Safety + /// Enabled loads must not exceed the length of `slice`. #[must_use] #[inline] pub unsafe fn load_select_unchecked( @@ -442,6 +463,9 @@ where /// /// When the element is disabled, that memory location is not accessed and the corresponding /// value from `or` is passed through. + /// + /// # Safety + /// Enabled `ptr` elements must be safe to read as if by `std::ptr::read`. #[must_use] #[inline] pub unsafe fn load_select_ptr( @@ -924,6 +948,7 @@ where } } +/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead. impl<T, const N: usize> PartialOrd for Simd<T, N> where LaneCount<N>: SupportedLaneCount, @@ -943,6 +968,7 @@ where { } +/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead. impl<T, const N: usize> Ord for Simd<T, N> where LaneCount<N>: SupportedLaneCount, @@ -1195,6 +1221,7 @@ fn lane_indices<const N: usize>() -> Simd<usize, N> where LaneCount<N>: SupportedLaneCount, { + #![allow(clippy::needless_range_loop)] let mut index = [0; N]; for i in 0..N { index[i] = i; diff --git a/library/portable-simd/crates/core_simd/src/vendor.rs b/library/portable-simd/crates/core_simd/src/vendor.rs index 1a34a3a8de5..57536e4fc77 100644 --- a/library/portable-simd/crates/core_simd/src/vendor.rs +++ b/library/portable-simd/crates/core_simd/src/vendor.rs @@ -29,3 +29,6 @@ mod arm; #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] mod powerpc; + +#[cfg(target_arch = "loongarch64")] +mod loongarch64; diff --git a/library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs b/library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs new file mode 100644 index 00000000000..1290bc166b2 --- /dev/null +++ b/library/portable-simd/crates/core_simd/src/vendor/loongarch64.rs @@ -0,0 +1,31 @@ +use crate::simd::*; +use core::arch::loongarch64::*; + +from_transmute! { unsafe u8x16 => v16u8 } +from_transmute! { unsafe u8x32 => v32u8 } +from_transmute! { unsafe i8x16 => v16i8 } +from_transmute! { unsafe i8x32 => v32i8 } + +from_transmute! { unsafe u16x8 => v8u16 } +from_transmute! { unsafe u16x16 => v16u16 } +from_transmute! { unsafe i16x8 => v8i16 } +from_transmute! { unsafe i16x16 => v16i16 } + +from_transmute! { unsafe u32x4 => v4u32 } +from_transmute! { unsafe u32x8 => v8u32 } +from_transmute! { unsafe i32x4 => v4i32 } +from_transmute! { unsafe i32x8 => v8i32 } +from_transmute! { unsafe f32x4 => v4f32 } +from_transmute! { unsafe f32x8 => v8f32 } + +from_transmute! { unsafe u64x2 => v2u64 } +from_transmute! { unsafe u64x4 => v4u64 } +from_transmute! { unsafe i64x2 => v2i64 } +from_transmute! { unsafe i64x4 => v4i64 } +from_transmute! { unsafe f64x2 => v2f64 } +from_transmute! { unsafe f64x4 => v4f64 } + +from_transmute! { unsafe usizex2 => v2u64 } +from_transmute! { unsafe usizex4 => v4u64 } +from_transmute! { unsafe isizex2 => v2i64 } +from_transmute! { unsafe isizex4 => v4i64 } diff --git a/library/portable-simd/crates/core_simd/tests/layout.rs b/library/portable-simd/crates/core_simd/tests/layout.rs new file mode 100644 index 00000000000..24114c2d261 --- /dev/null +++ b/library/portable-simd/crates/core_simd/tests/layout.rs @@ -0,0 +1,35 @@ +#![feature(portable_simd)] + +macro_rules! layout_tests { + { $($mod:ident, $ty:ty,)* } => { + $( + mod $mod { + test_helpers::test_lanes! { + fn no_padding<const LANES: usize>() { + assert_eq!( + core::mem::size_of::<core_simd::simd::Simd::<$ty, LANES>>(), + core::mem::size_of::<[$ty; LANES]>(), + ); + } + } + } + )* + } +} + +layout_tests! { + i8, i8, + i16, i16, + i32, i32, + i64, i64, + isize, isize, + u8, u8, + u16, u16, + u32, u32, + u64, u64, + usize, usize, + f32, f32, + f64, f64, + mut_ptr, *mut (), + const_ptr, *const (), +} diff --git a/library/portable-simd/crates/core_simd/tests/masks.rs b/library/portable-simd/crates/core_simd/tests/masks.rs index fc6a3476b7c..48786d02440 100644 --- a/library/portable-simd/crates/core_simd/tests/masks.rs +++ b/library/portable-simd/crates/core_simd/tests/masks.rs @@ -99,7 +99,6 @@ macro_rules! test_mask_api { assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask); } - #[cfg(feature = "all_lane_counts")] #[test] fn roundtrip_bitmask_conversion_odd() { let values = [ @@ -134,48 +133,6 @@ macro_rules! test_mask_api { cast_impl::<i64>(); cast_impl::<isize>(); } - - #[test] - fn roundtrip_bitmask_vector_conversion() { - use core_simd::simd::ToBytes; - let values = [ - true, false, false, true, false, false, true, false, - true, true, false, false, false, false, false, true, - ]; - let mask = Mask::<$type, 16>::from_array(values); - let bitmask = mask.to_bitmask_vector(); - assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]); - assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask); - } - - // rust-lang/portable-simd#379 - #[test] - fn roundtrip_bitmask_vector_conversion_small() { - use core_simd::simd::ToBytes; - let values = [ - true, false, true, true - ]; - let mask = Mask::<$type, 4>::from_array(values); - let bitmask = mask.to_bitmask_vector(); - assert_eq!(bitmask.resize::<1>(0).to_ne_bytes()[0], 0b00001101); - assert_eq!(Mask::<$type, 4>::from_bitmask_vector(bitmask), mask); - } - - /* FIXME doesn't work with non-powers-of-two, yet - // rust-lang/portable-simd#379 - #[cfg(feature = "all_lane_counts")] - #[test] - fn roundtrip_bitmask_vector_conversion_odd() { - use core_simd::simd::ToBytes; - let values = [ - true, false, true, false, true, true, false, false, false, true, true, - ]; - let mask = Mask::<$type, 11>::from_array(values); - let bitmask = mask.to_bitmask_vector(); - assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b00110101, 0b00000110]); - assert_eq!(Mask::<$type, 11>::from_bitmask_vector(bitmask), mask); - } - */ } } } diff --git a/library/portable-simd/crates/core_simd/tests/ops_macros.rs b/library/portable-simd/crates/core_simd/tests/ops_macros.rs index aa565a13752..6de78f51e59 100644 --- a/library/portable-simd/crates/core_simd/tests/ops_macros.rs +++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs @@ -216,6 +216,22 @@ macro_rules! impl_common_integer_tests { ) } + fn count_ones<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &$vector::<LANES>::count_ones, + &|x| x.count_ones() as _, + &|_| true, + ) + } + + fn count_zeros<const LANES: usize>() { + test_helpers::test_unary_elementwise( + &$vector::<LANES>::count_zeros, + &|x| x.count_zeros() as _, + &|_| true, + ) + } + fn leading_zeros<const LANES: usize>() { test_helpers::test_unary_elementwise( &$vector::<LANES>::leading_zeros, @@ -307,6 +323,14 @@ macro_rules! impl_signed_tests { assert_eq!(a % b, Vector::<LANES>::splat(0)); } + fn abs_diff<const LANES: usize>() { + test_helpers::test_binary_elementwise( + &Vector::<LANES>::abs_diff, + &Scalar::abs_diff, + &|_, _| true, + ) + } + fn simd_min<const LANES: usize>() { use core_simd::simd::cmp::SimdOrd; let a = Vector::<LANES>::splat(Scalar::MIN); @@ -419,6 +443,14 @@ macro_rules! impl_unsigned_tests { &|_| true, ); } + + fn abs_diff<const LANES: usize>() { + test_helpers::test_binary_elementwise( + &Vector::<LANES>::abs_diff, + &Scalar::abs_diff, + &|_, _| true, + ) + } } impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add); @@ -495,6 +527,9 @@ macro_rules! impl_float_tests { } fn is_normal<const LANES: usize>() { + // Arm v7 Neon violates float opsem re: subnormals, see + // https://github.com/rust-lang/portable-simd/issues/439 + #[cfg(not(target_arch = "arm"))] test_helpers::test_unary_mask_elementwise( &Vector::<LANES>::is_normal, &Scalar::is_normal, @@ -503,6 +538,9 @@ macro_rules! impl_float_tests { } fn is_subnormal<const LANES: usize>() { + // Arm v7 Neon violates float opsem re: subnormals, see + // https://github.com/rust-lang/portable-simd/issues/439 + #[cfg(not(target_arch = "arm"))] test_helpers::test_unary_mask_elementwise( &Vector::<LANES>::is_subnormal, &Scalar::is_subnormal, diff --git a/library/portable-simd/crates/core_simd/tests/swizzle.rs b/library/portable-simd/crates/core_simd/tests/swizzle.rs index 522d71439b7..7001e5f6bf8 100644 --- a/library/portable-simd/crates/core_simd/tests/swizzle.rs +++ b/library/portable-simd/crates/core_simd/tests/swizzle.rs @@ -50,6 +50,24 @@ fn rotate() { #[test] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn shift() { + let a = Simd::from_array([1, 2, 3, 4]); + assert_eq!(a.shift_elements_left::<0>(0).to_array(), [1, 2, 3, 4]); + assert_eq!(a.shift_elements_left::<1>(0).to_array(), [2, 3, 4, 0]); + assert_eq!(a.shift_elements_left::<2>(9).to_array(), [3, 4, 9, 9]); + assert_eq!(a.shift_elements_left::<3>(8).to_array(), [4, 8, 8, 8]); + assert_eq!(a.shift_elements_left::<4>(7).to_array(), [7, 7, 7, 7]); + assert_eq!(a.shift_elements_left::<5>(6).to_array(), [6, 6, 6, 6]); + assert_eq!(a.shift_elements_right::<0>(0).to_array(), [1, 2, 3, 4]); + assert_eq!(a.shift_elements_right::<1>(0).to_array(), [0, 1, 2, 3]); + assert_eq!(a.shift_elements_right::<2>(-1).to_array(), [-1, -1, 1, 2]); + assert_eq!(a.shift_elements_right::<3>(-2).to_array(), [-2, -2, -2, 1]); + assert_eq!(a.shift_elements_right::<4>(-3).to_array(), [-3, -3, -3, -3]); + assert_eq!(a.shift_elements_right::<5>(-4).to_array(), [-4, -4, -4, -4]); +} + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn interleave() { let a = Simd::from_array([0, 1, 2, 3, 4, 5, 6, 7]); let b = Simd::from_array([8, 9, 10, 11, 12, 13, 14, 15]); diff --git a/library/portable-simd/crates/test_helpers/Cargo.toml b/library/portable-simd/crates/test_helpers/Cargo.toml index 23dae7c9338..a5359b9abc8 100644 --- a/library/portable-simd/crates/test_helpers/Cargo.toml +++ b/library/portable-simd/crates/test_helpers/Cargo.toml @@ -6,6 +6,3 @@ publish = false [dependencies] proptest = { version = "0.10", default-features = false, features = ["alloc"] } - -[features] -all_lane_counts = [] diff --git a/library/portable-simd/crates/test_helpers/src/lib.rs b/library/portable-simd/crates/test_helpers/src/lib.rs index 51b860a8635..197c920e11e 100644 --- a/library/portable-simd/crates/test_helpers/src/lib.rs +++ b/library/portable-simd/crates/test_helpers/src/lib.rs @@ -539,32 +539,22 @@ macro_rules! test_lanes { #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]; lanes_1 1; lanes_2 2; - lanes_4 4; - ); - - #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow - $crate::test_lanes_helper!( - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]; - lanes_8 8; - lanes_16 16; - lanes_32 32; - lanes_64 64; - ); - - #[cfg(feature = "all_lane_counts")] - $crate::test_lanes_helper!( - // test some odd and even non-power-of-2 lengths on miri - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]; + // Cover an odd and an even non-power-of-2 length in Miri. + // (Even non-power-of-2 vectors have alignment between element + // and vector size, so we want to cover that case as well.) lanes_3 3; - lanes_5 5; + lanes_6 6; ); - #[cfg(feature = "all_lane_counts")] #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow $crate::test_lanes_helper!( #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]; + lanes_4 4; + lanes_5 5; + lanes_7 7; + lanes_8 8; lanes_9 9; lanes_10 10; lanes_11 11; @@ -572,52 +562,55 @@ macro_rules! test_lanes { lanes_13 13; lanes_14 14; lanes_15 15; + lanes_16 16; lanes_17 17; - lanes_18 18; - lanes_19 19; - lanes_20 20; - lanes_21 21; - lanes_22 22; - lanes_23 23; + //lanes_18 18; + //lanes_19 19; + //lanes_20 20; + //lanes_21 21; + //lanes_22 22; + //lanes_23 23; lanes_24 24; - lanes_25 25; - lanes_26 26; - lanes_27 27; - lanes_28 28; - lanes_29 29; - lanes_30 30; - lanes_31 31; - lanes_33 33; - lanes_34 34; - lanes_35 35; - lanes_36 36; - lanes_37 37; - lanes_38 38; - lanes_39 39; - lanes_40 40; - lanes_41 41; - lanes_42 42; - lanes_43 43; - lanes_44 44; - lanes_45 45; - lanes_46 46; + //lanes_25 25; + //lanes_26 26; + //lanes_27 27; + //lanes_28 28; + //lanes_29 29; + //lanes_30 30; + //lanes_31 31; + lanes_32 32; + //lanes_33 33; + //lanes_34 34; + //lanes_35 35; + //lanes_36 36; + //lanes_37 37; + //lanes_38 38; + //lanes_39 39; + //lanes_40 40; + //lanes_41 41; + //lanes_42 42; + //lanes_43 43; + //lanes_44 44; + //lanes_45 45; + //lanes_46 46; lanes_47 47; - lanes_48 48; - lanes_49 49; - lanes_50 50; - lanes_51 51; - lanes_52 52; - lanes_53 53; - lanes_54 54; - lanes_55 55; + //lanes_48 48; + //lanes_49 49; + //lanes_50 50; + //lanes_51 51; + //lanes_52 52; + //lanes_53 53; + //lanes_54 54; + //lanes_55 55; lanes_56 56; lanes_57 57; - lanes_58 58; - lanes_59 59; - lanes_60 60; - lanes_61 61; - lanes_62 62; + //lanes_58 58; + //lanes_59 59; + //lanes_60 60; + //lanes_61 61; + //lanes_62 62; lanes_63 63; + lanes_64 64; ); } )* @@ -639,36 +632,24 @@ macro_rules! test_lanes_panic { core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount, $body + // test some odd and even non-power-of-2 lengths on miri $crate::test_lanes_helper!( #[should_panic]; lanes_1 1; lanes_2 2; - lanes_4 4; - ); - - #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow - $crate::test_lanes_helper!( - #[should_panic]; - lanes_8 8; - lanes_16 16; - lanes_32 32; - lanes_64 64; - ); - - #[cfg(feature = "all_lane_counts")] - $crate::test_lanes_helper!( - // test some odd and even non-power-of-2 lengths on miri - #[should_panic]; lanes_3 3; - lanes_5 5; + lanes_6 6; ); - #[cfg(feature = "all_lane_counts")] #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow $crate::test_lanes_helper!( #[should_panic]; + lanes_4 4; + lanes_5 5; + lanes_7 7; + lanes_8 8; lanes_9 9; lanes_10 10; lanes_11 11; @@ -676,52 +657,55 @@ macro_rules! test_lanes_panic { lanes_13 13; lanes_14 14; lanes_15 15; + lanes_16 16; lanes_17 17; - lanes_18 18; - lanes_19 19; - lanes_20 20; - lanes_21 21; - lanes_22 22; - lanes_23 23; + //lanes_18 18; + //lanes_19 19; + //lanes_20 20; + //lanes_21 21; + //lanes_22 22; + //lanes_23 23; lanes_24 24; - lanes_25 25; - lanes_26 26; - lanes_27 27; - lanes_28 28; - lanes_29 29; - lanes_30 30; - lanes_31 31; - lanes_33 33; - lanes_34 34; - lanes_35 35; - lanes_36 36; - lanes_37 37; - lanes_38 38; - lanes_39 39; - lanes_40 40; - lanes_41 41; - lanes_42 42; - lanes_43 43; - lanes_44 44; - lanes_45 45; - lanes_46 46; + //lanes_25 25; + //lanes_26 26; + //lanes_27 27; + //lanes_28 28; + //lanes_29 29; + //lanes_30 30; + //lanes_31 31; + lanes_32 32; + //lanes_33 33; + //lanes_34 34; + //lanes_35 35; + //lanes_36 36; + //lanes_37 37; + //lanes_38 38; + //lanes_39 39; + //lanes_40 40; + //lanes_41 41; + //lanes_42 42; + //lanes_43 43; + //lanes_44 44; + //lanes_45 45; + //lanes_46 46; lanes_47 47; - lanes_48 48; - lanes_49 49; - lanes_50 50; - lanes_51 51; - lanes_52 52; - lanes_53 53; - lanes_54 54; - lanes_55 55; + //lanes_48 48; + //lanes_49 49; + //lanes_50 50; + //lanes_51 51; + //lanes_52 52; + //lanes_53 53; + //lanes_54 54; + //lanes_55 55; lanes_56 56; lanes_57 57; - lanes_58 58; - lanes_59 59; - lanes_60 60; - lanes_61 61; - lanes_62 62; + //lanes_58 58; + //lanes_59 59; + //lanes_60 60; + //lanes_61 61; + //lanes_62 62; lanes_63 63; + lanes_64 64; ); } )* diff --git a/library/portable-simd/rust-toolchain.toml b/library/portable-simd/rust-toolchain.toml new file mode 100644 index 00000000000..d17c6d2e889 --- /dev/null +++ b/library/portable-simd/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly-2025-01-16" +components = ["rustfmt", "clippy", "miri", "rust-src"] diff --git a/library/portable-simd/subtree-sync.sh b/library/portable-simd/subtree-sync.sh new file mode 100755 index 00000000000..18360077623 --- /dev/null +++ b/library/portable-simd/subtree-sync.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +set -eou pipefail + +git fetch origin +pushd $2 +git fetch origin +popd + +if [ "$(git rev-parse --show-prefix)" != "" ]; then + echo "Run this script from the git root" >&2 + exit 1 +fi + +if [ "$(git rev-parse HEAD)" != "$(git rev-parse origin/master)" ]; then + echo "$(pwd) is not at origin/master" >&2 + exit 1 +fi + +if [ ! -f library/portable-simd/git-subtree.sh ]; then + curl -sS https://raw.githubusercontent.com/bjorn3/git/tqc-subtree-portable/contrib/subtree/git-subtree.sh -o library/portable-simd/git-subtree.sh + chmod +x library/portable-simd/git-subtree.sh +fi + +today=$(date +%Y-%m-%d) + +case $1 in + "push") + upstream=rust-upstream-$today + merge=sync-from-rust-$today + + pushd $2 + git checkout master + git pull + popd + + library/portable-simd/git-subtree.sh push -P library/portable-simd $2 $upstream + + pushd $2 + git checkout -B $merge origin/master + git merge $upstream + popd + echo "Branch \`$merge\` created in \`$2\`. You may need to resolve merge conflicts." + ;; + "pull") + branch=sync-from-portable-simd-$today + + git checkout -B $branch + echo "Creating branch \`$branch\`... You may need to resolve merge conflicts." + library/portable-simd/git-subtree.sh pull -P library/portable-simd $2 origin/master + ;; +esac diff --git a/library/std/Cargo.toml b/library/std/Cargo.toml index aa391a4b317..add5a20d179 100644 --- a/library/std/Cargo.toml +++ b/library/std/Cargo.toml @@ -18,7 +18,7 @@ cfg-if = { version = "1.0", features = ['rustc-dep-of-std'] } panic_unwind = { path = "../panic_unwind", optional = true } panic_abort = { path = "../panic_abort" } core = { path = "../core", public = true } -compiler_builtins = { version = "=0.1.145" } +compiler_builtins = { version = "=0.1.146" } unwind = { path = "../unwind" } hashbrown = { version = "0.15", default-features = false, features = [ 'rustc-dep-of-std', diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs index cd3558ac6a4..f6f9067b9c6 100644 --- a/src/bootstrap/src/core/build_steps/compile.rs +++ b/src/bootstrap/src/core/build_steps/compile.rs @@ -443,8 +443,49 @@ fn compiler_rt_for_profiler(builder: &Builder<'_>) -> PathBuf { /// Configure cargo to compile the standard library, adding appropriate env vars /// and such. pub fn std_cargo(builder: &Builder<'_>, target: TargetSelection, stage: u32, cargo: &mut Cargo) { - if let Some(target) = env::var_os("MACOSX_STD_DEPLOYMENT_TARGET") { - cargo.env("MACOSX_DEPLOYMENT_TARGET", target); + // rustc already ensures that it builds with the minimum deployment + // target, so ideally we shouldn't need to do anything here. + // + // However, `cc` currently defaults to a higher version for backwards + // compatibility, which means that compiler-rt, which is built via + // compiler-builtins' build script, gets built with a higher deployment + // target. This in turn causes warnings while linking, and is generally + // a compatibility hazard. + // + // So, at least until https://github.com/rust-lang/cc-rs/issues/1171, or + // perhaps https://github.com/rust-lang/cargo/issues/13115 is resolved, we + // explicitly set the deployment target environment variables to avoid + // this issue. + // + // This place also serves as an extension point if we ever wanted to raise + // rustc's default deployment target while keeping the prebuilt `std` at + // a lower version, so it's kinda nice to have in any case. + if target.contains("apple") && !builder.config.dry_run() { + // Query rustc for the deployment target, and the associated env var. + // The env var is one of the standard `*_DEPLOYMENT_TARGET` vars, i.e. + // `MACOSX_DEPLOYMENT_TARGET`, `IPHONEOS_DEPLOYMENT_TARGET`, etc. + let mut cmd = command(builder.rustc(cargo.compiler())); + cmd.arg("--target").arg(target.rustc_target_arg()); + cmd.arg("--print=deployment-target"); + let output = cmd.run_capture_stdout(builder).stdout(); + + let (env_var, value) = output.split_once('=').unwrap(); + // Unconditionally set the env var (if it was set in the environment + // already, rustc should've picked that up). + cargo.env(env_var.trim(), value.trim()); + + // Allow CI to override the deployment target for `std` on macOS. + // + // This is useful because we might want the host tooling LLVM, `rustc` + // and Cargo to have a different deployment target than `std` itself + // (currently, these two versions are the same, but in the past, we + // supported macOS 10.7 for user code and macOS 10.8 in host tooling). + // + // It is not necessary on the other platforms, since only macOS has + // support for host tooling. + if let Some(target) = env::var_os("MACOSX_STD_DEPLOYMENT_TARGET") { + cargo.env("MACOSX_DEPLOYMENT_TARGET", target); + } } // Paths needed by `library/profiler_builtins/build.rs`. @@ -1049,12 +1090,12 @@ pub fn rustc_cargo( // <https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/Internal.20lint.20for.20raw.20.60print!.60.20and.20.60println!.60.3F>. cargo.rustflag("-Zon-broken-pipe=kill"); - // We temporarily disable linking here as part of some refactoring. - // This way, people can manually use -Z llvm-plugins and -C passes=enzyme for now. - // In a follow-up PR, we will re-enable linking here and load the pass for them. - //if builder.config.llvm_enzyme { - // cargo.rustflag("-l").rustflag("Enzyme-19"); - //} + // We want to link against registerEnzyme and in the future we want to use additional + // functionality from Enzyme core. For that we need to link against Enzyme. + // FIXME(ZuseZ4): Get the LLVM version number automatically instead of hardcoding it. + if builder.config.llvm_enzyme { + cargo.rustflag("-l").rustflag("Enzyme-19"); + } // Building with protected visibility reduces the number of dynamic relocations needed, giving // us a faster startup time. However GNU ld < 2.40 will error if we try to link a shared object @@ -1234,6 +1275,9 @@ fn rustc_llvm_env(builder: &Builder<'_>, cargo: &mut Cargo, target: TargetSelect if builder.is_rust_llvm(target) { cargo.env("LLVM_RUSTLLVM", "1"); } + if builder.config.llvm_enzyme { + cargo.env("LLVM_ENZYME", "1"); + } let llvm::LlvmResult { llvm_config, .. } = builder.ensure(llvm::Llvm { target }); cargo.env("LLVM_CONFIG", &llvm_config); @@ -1544,7 +1588,8 @@ pub fn compiler_file( return PathBuf::new(); } let mut cmd = command(compiler); - cmd.args(builder.cflags(target, GitRepo::Rustc, c)); + cmd.args(builder.cc_handled_clags(target, c)); + cmd.args(builder.cc_unhandled_cflags(target, GitRepo::Rustc, c)); cmd.arg(format!("-print-file-name={file}")); let out = cmd.run_capture_stdout(builder).stdout(); PathBuf::from(out.trim()) diff --git a/src/bootstrap/src/core/build_steps/llvm.rs b/src/bootstrap/src/core/build_steps/llvm.rs index 49bf04356d5..9dd35057062 100644 --- a/src/bootstrap/src/core/build_steps/llvm.rs +++ b/src/bootstrap/src/core/build_steps/llvm.rs @@ -54,6 +54,14 @@ impl LlvmBuildStatus { LlvmBuildStatus::ShouldBuild(_) => true, } } + + #[cfg(test)] + pub fn llvm_result(&self) -> &LlvmResult { + match self { + LlvmBuildStatus::AlreadyBuilt(res) => res, + LlvmBuildStatus::ShouldBuild(meta) => &meta.res, + } + } } /// Linker flags to pass to LLVM's CMake invocation. @@ -120,9 +128,19 @@ pub fn prebuilt_llvm_config( let root = "src/llvm-project/llvm"; let out_dir = builder.llvm_out(target); - let mut llvm_config_ret_dir = builder.llvm_out(builder.config.build); - llvm_config_ret_dir.push("bin"); - let build_llvm_config = llvm_config_ret_dir.join(exe("llvm-config", builder.config.build)); + let build_llvm_config = if let Some(build_llvm_config) = builder + .config + .target_config + .get(&builder.config.build) + .and_then(|config| config.llvm_config.clone()) + { + build_llvm_config + } else { + let mut llvm_config_ret_dir = builder.llvm_out(builder.config.build); + llvm_config_ret_dir.push("bin"); + llvm_config_ret_dir.join(exe("llvm-config", builder.config.build)) + }; + let llvm_cmake_dir = out_dir.join("lib/cmake/llvm"); let res = LlvmResult { llvm_config: build_llvm_config, llvm_cmake_dir }; @@ -761,9 +779,15 @@ fn configure_cmake( } cfg.build_arg("-j").build_arg(builder.jobs().to_string()); + // FIXME(madsmtm): Allow `cmake-rs` to select flags by itself by passing + // our flags via `.cflag`/`.cxxflag` instead. + // + // Needs `suppressed_compiler_flag_prefixes` to be gone, and hence + // https://github.com/llvm/llvm-project/issues/88780 to be fixed. let mut cflags: OsString = builder - .cflags(target, GitRepo::Llvm, CLang::C) + .cc_handled_clags(target, CLang::C) .into_iter() + .chain(builder.cc_unhandled_cflags(target, GitRepo::Llvm, CLang::C)) .filter(|flag| { !suppressed_compiler_flag_prefixes .iter() @@ -782,8 +806,9 @@ fn configure_cmake( } cfg.define("CMAKE_C_FLAGS", cflags); let mut cxxflags: OsString = builder - .cflags(target, GitRepo::Llvm, CLang::Cxx) + .cc_handled_clags(target, CLang::Cxx) .into_iter() + .chain(builder.cc_unhandled_cflags(target, GitRepo::Llvm, CLang::Cxx)) .filter(|flag| { !suppressed_compiler_flag_prefixes .iter() @@ -968,6 +993,7 @@ impl Step for Enzyme { .env("LLVM_CONFIG_REAL", &llvm_config) .define("LLVM_ENABLE_ASSERTIONS", "ON") .define("ENZYME_EXTERNAL_SHARED_LIB", "ON") + .define("ENZYME_RUNPASS", "ON") .define("LLVM_DIR", builder.llvm_out(target)); cfg.build(); diff --git a/src/bootstrap/src/core/build_steps/test.rs b/src/bootstrap/src/core/build_steps/test.rs index b0181410be1..c452f4cd6ae 100644 --- a/src/bootstrap/src/core/build_steps/test.rs +++ b/src/bootstrap/src/core/build_steps/test.rs @@ -2011,14 +2011,18 @@ NOTE: if you're sure you want to do this, please open an issue as to why. In the // Only pass correct values for these flags for the `run-make` suite as it // requires that a C++ compiler was configured which isn't always the case. if !builder.config.dry_run() && mode == "run-make" { + let mut cflags = builder.cc_handled_clags(target, CLang::C); + cflags.extend(builder.cc_unhandled_cflags(target, GitRepo::Rustc, CLang::C)); + let mut cxxflags = builder.cc_handled_clags(target, CLang::Cxx); + cxxflags.extend(builder.cc_unhandled_cflags(target, GitRepo::Rustc, CLang::Cxx)); cmd.arg("--cc") .arg(builder.cc(target)) .arg("--cxx") .arg(builder.cxx(target).unwrap()) .arg("--cflags") - .arg(builder.cflags(target, GitRepo::Rustc, CLang::C).join(" ")) + .arg(cflags.join(" ")) .arg("--cxxflags") - .arg(builder.cflags(target, GitRepo::Rustc, CLang::Cxx).join(" ")); + .arg(cxxflags.join(" ")); copts_passed = true; if let Some(ar) = builder.ar(target) { cmd.arg("--ar").arg(ar); diff --git a/src/bootstrap/src/core/builder/cargo.rs b/src/bootstrap/src/core/builder/cargo.rs index 2ecab262413..205649b12bb 100644 --- a/src/bootstrap/src/core/builder/cargo.rs +++ b/src/bootstrap/src/core/builder/cargo.rs @@ -323,8 +323,15 @@ impl Cargo { let cc = ccacheify(&builder.cc(target)); self.command.env(format!("CC_{triple_underscored}"), &cc); - let cflags = builder.cflags(target, GitRepo::Rustc, CLang::C).join(" "); - self.command.env(format!("CFLAGS_{triple_underscored}"), &cflags); + // Extend `CXXFLAGS_$TARGET` with our extra flags. + let env = format!("CFLAGS_{triple_underscored}"); + let mut cflags = + builder.cc_unhandled_cflags(target, GitRepo::Rustc, CLang::C).join(" "); + if let Ok(var) = std::env::var(&env) { + cflags.push(' '); + cflags.push_str(&var); + } + self.command.env(env, &cflags); if let Some(ar) = builder.ar(target) { let ranlib = format!("{} s", ar.display()); @@ -335,10 +342,17 @@ impl Cargo { if let Ok(cxx) = builder.cxx(target) { let cxx = ccacheify(&cxx); - let cxxflags = builder.cflags(target, GitRepo::Rustc, CLang::Cxx).join(" "); - self.command - .env(format!("CXX_{triple_underscored}"), &cxx) - .env(format!("CXXFLAGS_{triple_underscored}"), cxxflags); + self.command.env(format!("CXX_{triple_underscored}"), &cxx); + + // Extend `CXXFLAGS_$TARGET` with our extra flags. + let env = format!("CXXFLAGS_{triple_underscored}"); + let mut cxxflags = + builder.cc_unhandled_cflags(target, GitRepo::Rustc, CLang::Cxx).join(" "); + if let Ok(var) = std::env::var(&env) { + cxxflags.push(' '); + cxxflags.push_str(&var); + } + self.command.env(&env, cxxflags); } } diff --git a/src/bootstrap/src/core/builder/tests.rs b/src/bootstrap/src/core/builder/tests.rs index 74e1ed5c637..994975ed5a8 100644 --- a/src/bootstrap/src/core/builder/tests.rs +++ b/src/bootstrap/src/core/builder/tests.rs @@ -1,5 +1,7 @@ use std::thread; +use llvm::prebuilt_llvm_config; + use super::*; use crate::Flags; use crate::core::build_steps::doc::DocumentationFormat; @@ -954,3 +956,112 @@ fn test_test_coverage() { assert_eq!(modes, expected); } } + +#[test] +fn test_prebuilt_llvm_config_path_resolution() { + fn configure(config: &str) -> Config { + Config::parse_inner( + Flags::parse(&[ + "build".to_string(), + "--dry-run".to_string(), + "--config=/does/not/exist".to_string(), + ]), + |&_| toml::from_str(&config), + ) + } + + // Removes Windows disk prefix if present + fn drop_win_disk_prefix_if_present(path: PathBuf) -> PathBuf { + let path_str = path.to_str().unwrap(); + if let Some((_, without_prefix)) = path_str.split_once(":/") { + return PathBuf::from(format!("/{}", without_prefix)); + } + + path + } + + let config = configure( + r#" + [llvm] + download-ci-llvm = false + + [build] + build = "x86_64-unknown-linux-gnu" + host = ["arm-unknown-linux-gnueabihf"] + target = ["arm-unknown-linux-gnueabihf"] + + [target.x86_64-unknown-linux-gnu] + llvm-config = "/some/path/to/llvm-config" + + [target.arm-unknown-linux-gnueabihf] + cc = "arm-linux-gnueabihf-gcc" + cxx = "arm-linux-gnueabihf-g++" + "#, + ); + + let build = Build::new(config); + let builder = Builder::new(&build); + + let expected = PathBuf::from("/some/path/to/llvm-config"); + + let actual = prebuilt_llvm_config( + &builder, + TargetSelection::from_user("arm-unknown-linux-gnueabihf"), + false, + ) + .llvm_result() + .llvm_config + .clone(); + let actual = drop_win_disk_prefix_if_present(actual); + assert_eq!(expected, actual); + + let actual = prebuilt_llvm_config(&builder, builder.config.build, false) + .llvm_result() + .llvm_config + .clone(); + let actual = drop_win_disk_prefix_if_present(actual); + assert_eq!(expected, actual); + assert_eq!(expected, actual); + + let config = configure( + r#" + [llvm] + download-ci-llvm = false + "#, + ); + + let build = Build::new(config.clone()); + let builder = Builder::new(&build); + + let actual = prebuilt_llvm_config(&builder, builder.config.build, false) + .llvm_result() + .llvm_config + .clone(); + let expected = builder + .out + .join(builder.config.build) + .join("llvm/bin") + .join(exe("llvm-config", builder.config.build)); + assert_eq!(expected, actual); + + let config = configure( + r#" + [llvm] + download-ci-llvm = true + "#, + ); + + let build = Build::new(config.clone()); + let builder = Builder::new(&build); + + let actual = prebuilt_llvm_config(&builder, builder.config.build, false) + .llvm_result() + .llvm_config + .clone(); + let expected = builder + .out + .join(builder.config.build) + .join("ci-llvm/bin") + .join(exe("llvm-config", builder.config.build)); + assert_eq!(expected, actual); +} diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs index bc146eda1d5..e564a4b9751 100644 --- a/src/bootstrap/src/lib.rs +++ b/src/bootstrap/src/lib.rs @@ -249,6 +249,7 @@ impl Mode { } } +#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub enum CLang { C, Cxx, @@ -1178,9 +1179,9 @@ Executed at: {executed_at}"#, self.cc.borrow()[&target].path().into() } - /// Returns a list of flags to pass to the C compiler for the target - /// specified. - fn cflags(&self, target: TargetSelection, which: GitRepo, c: CLang) -> Vec<String> { + /// Returns C flags that `cc-rs` thinks should be enabled for the + /// specified target by default. + fn cc_handled_clags(&self, target: TargetSelection, c: CLang) -> Vec<String> { if self.config.dry_run() { return Vec::new(); } @@ -1189,14 +1190,23 @@ Executed at: {executed_at}"#, CLang::Cxx => self.cxx.borrow()[&target].clone(), }; - // Filter out -O and /O (the optimization flags) that we picked up from - // cc-rs because the build scripts will determine that for themselves. - let mut base = base - .args() + // Filter out -O and /O (the optimization flags) that we picked up + // from cc-rs, that's up to the caller to figure out. + base.args() .iter() .map(|s| s.to_string_lossy().into_owned()) .filter(|s| !s.starts_with("-O") && !s.starts_with("/O")) - .collect::<Vec<String>>(); + .collect::<Vec<String>>() + } + + /// Returns extra C flags that `cc-rs` doesn't handle. + fn cc_unhandled_cflags( + &self, + target: TargetSelection, + which: GitRepo, + c: CLang, + ) -> Vec<String> { + let mut base = Vec::new(); // If we're compiling C++ on macOS then we add a flag indicating that // we want libc++ (more filled out than libstdc++), ensuring that diff --git a/src/bootstrap/src/utils/cc_detect.rs b/src/bootstrap/src/utils/cc_detect.rs index 4aec554b432..f6afd50afce 100644 --- a/src/bootstrap/src/utils/cc_detect.rs +++ b/src/bootstrap/src/utils/cc_detect.rs @@ -142,7 +142,8 @@ pub fn find_target(build: &Build, target: TargetSelection) { }; build.cc.borrow_mut().insert(target, compiler.clone()); - let cflags = build.cflags(target, GitRepo::Rustc, CLang::C); + let mut cflags = build.cc_handled_clags(target, CLang::C); + cflags.extend(build.cc_unhandled_cflags(target, GitRepo::Rustc, CLang::C)); // If we use llvm-libunwind, we will need a C++ compiler as well for all targets // We'll need one anyways if the target triple is also a host triple @@ -168,7 +169,8 @@ pub fn find_target(build: &Build, target: TargetSelection) { build.verbose(|| println!("CC_{} = {:?}", target.triple, build.cc(target))); build.verbose(|| println!("CFLAGS_{} = {cflags:?}", target.triple)); if let Ok(cxx) = build.cxx(target) { - let cxxflags = build.cflags(target, GitRepo::Rustc, CLang::Cxx); + let mut cxxflags = build.cc_handled_clags(target, CLang::Cxx); + cxxflags.extend(build.cc_unhandled_cflags(target, GitRepo::Rustc, CLang::Cxx)); build.verbose(|| println!("CXX_{} = {cxx:?}", target.triple)); build.verbose(|| println!("CXXFLAGS_{} = {cxxflags:?}", target.triple)); } diff --git a/src/ci/docker/host-x86_64/dist-loongarch64-linux/Dockerfile b/src/ci/docker/host-x86_64/dist-loongarch64-linux/Dockerfile index 71eb72686b0..e3f23149284 100644 --- a/src/ci/docker/host-x86_64/dist-loongarch64-linux/Dockerfile +++ b/src/ci/docker/host-x86_64/dist-loongarch64-linux/Dockerfile @@ -3,8 +3,8 @@ FROM ubuntu:22.04 COPY scripts/cross-apt-packages.sh /scripts/ RUN sh /scripts/cross-apt-packages.sh -COPY scripts/crosstool-ng-git.sh /scripts/ -RUN sh /scripts/crosstool-ng-git.sh +COPY scripts/crosstool-ng.sh /scripts/ +RUN sh /scripts/crosstool-ng.sh COPY scripts/rustbuild-setup.sh /scripts/ RUN sh /scripts/rustbuild-setup.sh diff --git a/src/ci/docker/host-x86_64/dist-loongarch64-musl/Dockerfile b/src/ci/docker/host-x86_64/dist-loongarch64-musl/Dockerfile index 5081f25e567..2c33b5526ee 100644 --- a/src/ci/docker/host-x86_64/dist-loongarch64-musl/Dockerfile +++ b/src/ci/docker/host-x86_64/dist-loongarch64-musl/Dockerfile @@ -3,8 +3,8 @@ FROM ubuntu:22.04 COPY scripts/cross-apt-packages.sh /scripts/ RUN sh /scripts/cross-apt-packages.sh -COPY scripts/crosstool-ng-git.sh /scripts/ -RUN sh /scripts/crosstool-ng-git.sh +COPY scripts/crosstool-ng.sh /scripts/ +RUN sh /scripts/crosstool-ng.sh COPY scripts/rustbuild-setup.sh /scripts/ RUN sh /scripts/rustbuild-setup.sh diff --git a/src/ci/docker/host-x86_64/dist-powerpc64le-linux/Dockerfile b/src/ci/docker/host-x86_64/dist-powerpc64le-linux/Dockerfile index 9d3be51d037..cb20f43cff7 100644 --- a/src/ci/docker/host-x86_64/dist-powerpc64le-linux/Dockerfile +++ b/src/ci/docker/host-x86_64/dist-powerpc64le-linux/Dockerfile @@ -3,8 +3,8 @@ FROM ubuntu:22.04 COPY scripts/cross-apt-packages.sh /scripts/ RUN sh /scripts/cross-apt-packages.sh -COPY scripts/crosstool-ng-git.sh /scripts/ -RUN sh /scripts/crosstool-ng-git.sh +COPY scripts/crosstool-ng.sh /scripts/ +RUN sh /scripts/crosstool-ng.sh COPY scripts/rustbuild-setup.sh /scripts/ RUN sh /scripts/rustbuild-setup.sh diff --git a/src/ci/docker/scripts/crosstool-ng-git.sh b/src/ci/docker/scripts/crosstool-ng-git.sh deleted file mode 100644 index e86810ae613..00000000000 --- a/src/ci/docker/scripts/crosstool-ng-git.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh -set -ex - -URL=https://github.com/crosstool-ng/crosstool-ng -REV=ed12fa68402f58e171a6f79500f73f4781fdc9e5 - -mkdir crosstool-ng -cd crosstool-ng -git init -git fetch --depth=1 ${URL} ${REV} -git reset --hard FETCH_HEAD -./bootstrap -./configure --prefix=/usr/local -make -j$(nproc) -make install -cd .. -rm -rf crosstool-ng diff --git a/src/ci/docker/scripts/crosstool-ng.sh b/src/ci/docker/scripts/crosstool-ng.sh index c3ee19b8d2c..eee912b0d75 100644 --- a/src/ci/docker/scripts/crosstool-ng.sh +++ b/src/ci/docker/scripts/crosstool-ng.sh @@ -1,7 +1,7 @@ #!/bin/sh set -ex -CT_NG=1.26.0 +CT_NG=1.27.0 url="https://github.com/crosstool-ng/crosstool-ng/archive/crosstool-ng-$CT_NG.tar.gz" curl -Lf $url | tar xzf - diff --git a/src/ci/github-actions/jobs.yml b/src/ci/github-actions/jobs.yml index 341d82599fd..8ae6d543bc5 100644 --- a/src/ci/github-actions/jobs.yml +++ b/src/ci/github-actions/jobs.yml @@ -59,6 +59,7 @@ envs: SCRIPT: ./x.py --stage 2 test --skip tests/ui --skip tests/rustdoc -- --exact RUST_CONFIGURE_ARGS: --build=x86_64-apple-darwin --enable-sanitizers --enable-profiler --set rust.jemalloc RUSTC_RETRY_LINKER_ON_SEGFAULT: 1 + # Ensure that host tooling is tested on our minimum supported macOS version. MACOSX_DEPLOYMENT_TARGET: 10.12 MACOSX_STD_DEPLOYMENT_TARGET: 10.12 SELECT_XCODE: /Applications/Xcode_15.2.app @@ -370,7 +371,9 @@ auto: SCRIPT: ./x.py dist bootstrap --include-default-paths --host=x86_64-apple-darwin --target=x86_64-apple-darwin RUST_CONFIGURE_ARGS: --enable-full-tools --enable-sanitizers --enable-profiler --set rust.jemalloc --set rust.lto=thin --set rust.codegen-units=1 RUSTC_RETRY_LINKER_ON_SEGFAULT: 1 + # Ensure that host tooling is built to support our minimum support macOS version. MACOSX_DEPLOYMENT_TARGET: 10.12 + MACOSX_STD_DEPLOYMENT_TARGET: 10.12 SELECT_XCODE: /Applications/Xcode_15.2.app NO_LLVM_ASSERTIONS: 1 NO_DEBUG_ASSERTIONS: 1 @@ -386,7 +389,10 @@ auto: # https://github.com/rust-lang/rust/issues/129069 RUST_CONFIGURE_ARGS: --enable-sanitizers --enable-profiler --set rust.jemalloc --set target.aarch64-apple-ios-macabi.sanitizers=false --set target.x86_64-apple-ios-macabi.sanitizers=false RUSTC_RETRY_LINKER_ON_SEGFAULT: 1 + # Ensure that host tooling is built to support our minimum support macOS version. + # FIXME(madsmtm): This might be redundant, as we're not building host tooling here (?) MACOSX_DEPLOYMENT_TARGET: 10.12 + MACOSX_STD_DEPLOYMENT_TARGET: 10.12 SELECT_XCODE: /Applications/Xcode_15.2.app NO_LLVM_ASSERTIONS: 1 NO_DEBUG_ASSERTIONS: 1 @@ -404,7 +410,6 @@ auto: <<: *env-x86_64-apple-tests <<: *job-macos-xl - # This target only needs to support 11.0 and up as nothing else supports the hardware - name: dist-aarch64-apple env: SCRIPT: ./x.py dist bootstrap --include-default-paths --host=aarch64-apple-darwin --target=aarch64-apple-darwin @@ -419,6 +424,8 @@ auto: RUSTC_RETRY_LINKER_ON_SEGFAULT: 1 SELECT_XCODE: /Applications/Xcode_15.4.app USE_XCODE_CLANG: 1 + # Aarch64 tooling only needs to support macOS 11.0 and up as nothing else + # supports the hardware. MACOSX_DEPLOYMENT_TARGET: 11.0 MACOSX_STD_DEPLOYMENT_TARGET: 11.0 NO_LLVM_ASSERTIONS: 1 @@ -428,7 +435,6 @@ auto: CODEGEN_BACKENDS: llvm,cranelift <<: *job-macos-m1 - # This target only needs to support 11.0 and up as nothing else supports the hardware - name: aarch64-apple env: SCRIPT: ./x.py --stage 2 test --host=aarch64-apple-darwin --target=aarch64-apple-darwin @@ -439,6 +445,8 @@ auto: RUSTC_RETRY_LINKER_ON_SEGFAULT: 1 SELECT_XCODE: /Applications/Xcode_15.4.app USE_XCODE_CLANG: 1 + # Aarch64 tooling only needs to support macOS 11.0 and up as nothing else + # supports the hardware, so only need to test it there. MACOSX_DEPLOYMENT_TARGET: 11.0 MACOSX_STD_DEPLOYMENT_TARGET: 11.0 NO_LLVM_ASSERTIONS: 1 diff --git a/src/tools/enzyme b/src/tools/enzyme -Subproject 0e5fa4a3d475f4dece489c9e06b11164f83789f +Subproject 7f3b207c4413c9d715fd54b36b8a8fd3179e0b6 diff --git a/src/tools/miri/tests/pass/intrinsics/portable-simd.rs b/src/tools/miri/tests/pass/intrinsics/portable-simd.rs index 0d0d79098d5..b6c5522b3d2 100644 --- a/src/tools/miri/tests/pass/intrinsics/portable-simd.rs +++ b/src/tools/miri/tests/pass/intrinsics/portable-simd.rs @@ -299,27 +299,6 @@ fn simd_mask() { } } - // This used to cause an ICE. It exercises simd_select_bitmask with an array as input. - let bitmask = u8x4::from_array([0b00001101, 0, 0, 0]); - assert_eq!( - mask32x4::from_bitmask_vector(bitmask), - mask32x4::from_array([true, false, true, true]), - ); - let bitmask = u8x8::from_array([0b01000101, 0, 0, 0, 0, 0, 0, 0]); - assert_eq!( - mask32x8::from_bitmask_vector(bitmask), - mask32x8::from_array([true, false, true, false, false, false, true, false]), - ); - let bitmask = - u8x16::from_array([0b01000101, 0b11110000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); - assert_eq!( - mask32x16::from_bitmask_vector(bitmask), - mask32x16::from_array([ - true, false, true, false, false, false, true, false, false, false, false, false, true, - true, true, true, - ]), - ); - // Also directly call simd_select_bitmask, to test both kinds of argument types. unsafe { // These masks are exactly the results we got out above in the `simd_bitmask` tests. diff --git a/tests/codegen/autodiff.rs b/tests/codegen/autodiff.rs new file mode 100644 index 00000000000..abf7fcf3e4b --- /dev/null +++ b/tests/codegen/autodiff.rs @@ -0,0 +1,33 @@ +//@ compile-flags: -C opt-level=3 -Clto=fat +//@ no-prefer-dynamic +//@ needs-enzyme +#![feature(autodiff)] + +use std::autodiff::autodiff; + +#[autodiff(d_square, Reverse, Duplicated, Active)] +#[no_mangle] +fn square(x: &f64) -> f64 { + x * x +} + +// CHECK:define internal fastcc double @diffesquare(double %x.0.val, ptr nocapture align 8 %"x'" +// CHECK-NEXT:invertstart: +// CHECK-NEXT: %_0 = fmul double %x.0.val, %x.0.val +// CHECK-NEXT: %0 = fadd fast double %x.0.val, %x.0.val +// CHECK-NEXT: %1 = load double, ptr %"x'", align 8 +// CHECK-NEXT: %2 = fadd fast double %1, %0 +// CHECK-NEXT: store double %2, ptr %"x'", align 8 +// CHECK-NEXT: ret double %_0 +// CHECK-NEXT:} + +fn main() { + let x = 3.0; + let output = square(&x); + assert_eq!(9.0, output); + + let mut df_dx = 0.0; + let output_ = d_square(&x, &mut df_dx, 1.0); + assert_eq!(output, output_); + assert_eq!(6.0, df_dx); +} diff --git a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.32bit.panic-abort.diff b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.32bit.panic-abort.diff index 5a830254f61..2c89670dcf7 100644 --- a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.32bit.panic-abort.diff +++ b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.32bit.panic-abort.diff @@ -26,7 +26,7 @@ } scope 11 (inlined NonZero::<usize>::get) { } - scope 12 (inlined without_provenance::<[bool; 0]>) { + scope 12 (inlined std::ptr::without_provenance::<[bool; 0]>) { scope 13 (inlined without_provenance_mut::<[bool; 0]>) { } } diff --git a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.32bit.panic-unwind.diff b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.32bit.panic-unwind.diff index c11368a347c..8fecfe224cc 100644 --- a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.32bit.panic-unwind.diff +++ b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.32bit.panic-unwind.diff @@ -26,7 +26,7 @@ } scope 11 (inlined NonZero::<usize>::get) { } - scope 12 (inlined without_provenance::<[bool; 0]>) { + scope 12 (inlined std::ptr::without_provenance::<[bool; 0]>) { scope 13 (inlined without_provenance_mut::<[bool; 0]>) { } } diff --git a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.64bit.panic-abort.diff b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.64bit.panic-abort.diff index 037ed02ce65..976ea252c2f 100644 --- a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.64bit.panic-abort.diff +++ b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.64bit.panic-abort.diff @@ -26,7 +26,7 @@ } scope 11 (inlined NonZero::<usize>::get) { } - scope 12 (inlined without_provenance::<[bool; 0]>) { + scope 12 (inlined std::ptr::without_provenance::<[bool; 0]>) { scope 13 (inlined without_provenance_mut::<[bool; 0]>) { } } diff --git a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.64bit.panic-unwind.diff b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.64bit.panic-unwind.diff index 86351c78759..6c59f5e3e2e 100644 --- a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.64bit.panic-unwind.diff +++ b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.DataflowConstProp.64bit.panic-unwind.diff @@ -26,7 +26,7 @@ } scope 11 (inlined NonZero::<usize>::get) { } - scope 12 (inlined without_provenance::<[bool; 0]>) { + scope 12 (inlined std::ptr::without_provenance::<[bool; 0]>) { scope 13 (inlined without_provenance_mut::<[bool; 0]>) { } } diff --git a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.32bit.panic-abort.diff b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.32bit.panic-abort.diff index 20a3897a934..1f9cf6d6aca 100644 --- a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.32bit.panic-abort.diff +++ b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.32bit.panic-abort.diff @@ -26,7 +26,7 @@ } scope 11 (inlined NonZero::<usize>::get) { } - scope 12 (inlined without_provenance::<[bool; 0]>) { + scope 12 (inlined std::ptr::without_provenance::<[bool; 0]>) { scope 13 (inlined without_provenance_mut::<[bool; 0]>) { } } diff --git a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.32bit.panic-unwind.diff b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.32bit.panic-unwind.diff index 2e396301fd0..a8760285fac 100644 --- a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.32bit.panic-unwind.diff +++ b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.32bit.panic-unwind.diff @@ -26,7 +26,7 @@ } scope 11 (inlined NonZero::<usize>::get) { } - scope 12 (inlined without_provenance::<[bool; 0]>) { + scope 12 (inlined std::ptr::without_provenance::<[bool; 0]>) { scope 13 (inlined without_provenance_mut::<[bool; 0]>) { } } diff --git a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.64bit.panic-abort.diff b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.64bit.panic-abort.diff index 319691174cf..c398ae70a1a 100644 --- a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.64bit.panic-abort.diff +++ b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.64bit.panic-abort.diff @@ -26,7 +26,7 @@ } scope 11 (inlined NonZero::<usize>::get) { } - scope 12 (inlined without_provenance::<[bool; 0]>) { + scope 12 (inlined std::ptr::without_provenance::<[bool; 0]>) { scope 13 (inlined without_provenance_mut::<[bool; 0]>) { } } diff --git a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.64bit.panic-unwind.diff b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.64bit.panic-unwind.diff index 5dafc89d53f..02934c02587 100644 --- a/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.64bit.panic-unwind.diff +++ b/tests/mir-opt/dataflow-const-prop/default_boxed_slice.main.GVN.64bit.panic-unwind.diff @@ -26,7 +26,7 @@ } scope 11 (inlined NonZero::<usize>::get) { } - scope 12 (inlined without_provenance::<[bool; 0]>) { + scope 12 (inlined std::ptr::without_provenance::<[bool; 0]>) { scope 13 (inlined without_provenance_mut::<[bool; 0]>) { } } diff --git a/tests/mir-opt/gvn_ptr_eq_with_constant.main.GVN.diff b/tests/mir-opt/gvn_ptr_eq_with_constant.main.GVN.diff index 8e7964297d0..f56af33ea60 100644 --- a/tests/mir-opt/gvn_ptr_eq_with_constant.main.GVN.diff +++ b/tests/mir-opt/gvn_ptr_eq_with_constant.main.GVN.diff @@ -16,7 +16,7 @@ } scope 8 (inlined NonZero::<usize>::get) { } - scope 9 (inlined without_provenance::<u8>) { + scope 9 (inlined std::ptr::without_provenance::<u8>) { scope 10 (inlined without_provenance_mut::<u8>) { } } diff --git a/tests/mir-opt/pre-codegen/slice_iter.enumerated_loop.PreCodegen.after.panic-abort.mir b/tests/mir-opt/pre-codegen/slice_iter.enumerated_loop.PreCodegen.after.panic-abort.mir index 496ec78fd8d..b7a9b4a1fe0 100644 --- a/tests/mir-opt/pre-codegen/slice_iter.enumerated_loop.PreCodegen.after.panic-abort.mir +++ b/tests/mir-opt/pre-codegen/slice_iter.enumerated_loop.PreCodegen.after.panic-abort.mir @@ -59,7 +59,7 @@ fn enumerated_loop(_1: &[T], _2: impl Fn(usize, &T)) -> () { let _9: *const T; scope 7 { } - scope 12 (inlined without_provenance::<T>) { + scope 12 (inlined std::ptr::without_provenance::<T>) { scope 13 (inlined without_provenance_mut::<T>) { } } diff --git a/tests/mir-opt/pre-codegen/slice_iter.enumerated_loop.PreCodegen.after.panic-unwind.mir b/tests/mir-opt/pre-codegen/slice_iter.enumerated_loop.PreCodegen.after.panic-unwind.mir index c4547cb888f..33dbf04d028 100644 --- a/tests/mir-opt/pre-codegen/slice_iter.enumerated_loop.PreCodegen.after.panic-unwind.mir +++ b/tests/mir-opt/pre-codegen/slice_iter.enumerated_loop.PreCodegen.after.panic-unwind.mir @@ -34,7 +34,7 @@ fn enumerated_loop(_1: &[T], _2: impl Fn(usize, &T)) -> () { let _9: *const T; scope 7 { } - scope 12 (inlined without_provenance::<T>) { + scope 12 (inlined std::ptr::without_provenance::<T>) { scope 13 (inlined without_provenance_mut::<T>) { } } diff --git a/tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.panic-abort.mir b/tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.panic-abort.mir index 7d011ea3347..dc13bb23c31 100644 --- a/tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.panic-abort.mir +++ b/tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.panic-abort.mir @@ -31,7 +31,7 @@ fn forward_loop(_1: &[T], _2: impl Fn(&T)) -> () { let _9: *const T; scope 7 { } - scope 12 (inlined without_provenance::<T>) { + scope 12 (inlined std::ptr::without_provenance::<T>) { scope 13 (inlined without_provenance_mut::<T>) { } } diff --git a/tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.panic-unwind.mir b/tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.panic-unwind.mir index 75e6542a3a4..3f1e0e0f746 100644 --- a/tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.panic-unwind.mir +++ b/tests/mir-opt/pre-codegen/slice_iter.forward_loop.PreCodegen.after.panic-unwind.mir @@ -31,7 +31,7 @@ fn forward_loop(_1: &[T], _2: impl Fn(&T)) -> () { let _9: *const T; scope 7 { } - scope 12 (inlined without_provenance::<T>) { + scope 12 (inlined std::ptr::without_provenance::<T>) { scope 13 (inlined without_provenance_mut::<T>) { } } diff --git a/tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.panic-abort.mir b/tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.panic-abort.mir index 41bc91ab028..4b7ab4516d2 100644 --- a/tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.panic-abort.mir +++ b/tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.panic-abort.mir @@ -34,7 +34,7 @@ fn reverse_loop(_1: &[T], _2: impl Fn(&T)) -> () { let _9: *const T; scope 7 { } - scope 12 (inlined without_provenance::<T>) { + scope 12 (inlined std::ptr::without_provenance::<T>) { scope 13 (inlined without_provenance_mut::<T>) { } } diff --git a/tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.panic-unwind.mir b/tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.panic-unwind.mir index 6ed8ef9715b..b2c15247cd7 100644 --- a/tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.panic-unwind.mir +++ b/tests/mir-opt/pre-codegen/slice_iter.reverse_loop.PreCodegen.after.panic-unwind.mir @@ -34,7 +34,7 @@ fn reverse_loop(_1: &[T], _2: impl Fn(&T)) -> () { let _9: *const T; scope 7 { } - scope 12 (inlined without_provenance::<T>) { + scope 12 (inlined std::ptr::without_provenance::<T>) { scope 13 (inlined without_provenance_mut::<T>) { } } diff --git a/tests/run-make/apple-deployment-target/rmake.rs b/tests/run-make/apple-deployment-target/rmake.rs index 0ae95cb1f4b..839e21b7496 100644 --- a/tests/run-make/apple-deployment-target/rmake.rs +++ b/tests/run-make/apple-deployment-target/rmake.rs @@ -7,7 +7,11 @@ //@ only-apple -use run_make_support::{apple_os, cmd, run_in_tmpdir, rustc, target}; +use std::collections::HashSet; + +use run_make_support::{ + apple_os, cmd, has_extension, path, regex, run_in_tmpdir, rustc, shallow_find_files, target, +}; /// Run vtool to check the `minos` field in LC_BUILD_VERSION. /// @@ -166,4 +170,21 @@ fn main() { rustc().env_remove(env_var).run(); minos("foo.o", default_version); }); + + // Test that all binaries in rlibs produced by `rustc` have the same version. + // Regression test for https://github.com/rust-lang/rust/issues/128419. + let sysroot = rustc().print("sysroot").run().stdout_utf8(); + let target_sysroot = path(sysroot.trim()).join("lib/rustlib").join(target()).join("lib"); + let rlibs = shallow_find_files(&target_sysroot, |path| has_extension(path, "rlib")); + + let output = cmd("otool").arg("-l").args(rlibs).run().stdout_utf8(); + let re = regex::Regex::new(r"(minos|version) ([0-9.]*)").unwrap(); + let mut versions = HashSet::new(); + for (_, [_, version]) in re.captures_iter(&output).map(|c| c.extract()) { + versions.insert(version); + } + // FIXME(madsmtm): See above for aarch64-apple-watchos. + if versions.len() != 1 && target() != "aarch64-apple-watchos" { + panic!("std rlibs contained multiple different deployment target versions: {versions:?}"); + } } |
